From 34e841fa7422eb07f5522406fb83942efd56ad40 Mon Sep 17 00:00:00 2001 From: John Scancella Date: Tue, 27 Jun 2017 10:10:07 -0400 Subject: [PATCH] refs #92 - check for blank lines in fetch file and throw error if there are any --- .../repository/bagit/reader/FetchReader.java | 22 ++++++++++++------- src/main/resources/MessageBundle.properties | 1 + .../bagit/reader/FetchReaderTest.java | 18 +++++++++++++++ .../fetchFiles/fetchWithBlankLines.txt | 7 ++++++ 4 files changed, 40 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/fetchFiles/fetchWithBlankLines.txt diff --git a/src/main/java/gov/loc/repository/bagit/reader/FetchReader.java b/src/main/java/gov/loc/repository/bagit/reader/FetchReader.java index 594883b9e..3e0336dda 100644 --- a/src/main/java/gov/loc/repository/bagit/reader/FetchReader.java +++ b/src/main/java/gov/loc/repository/bagit/reader/FetchReader.java @@ -23,6 +23,7 @@ public final class FetchReader { private static final Logger logger = LoggerFactory.getLogger(FetchReader.class); private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle"); + private static final String FETCH_LINE_REGEX = ".*[ \t]*(\\d*|-)[ \t]*.*"; private FetchReader(){ //intentionally left empty @@ -51,14 +52,19 @@ public static List readFetch(final Path fetchFile, final Charset enco long length = 0; URL url = null; while(line != null){ - parts = line.split("\\s+", 3); - final Path path = TagFileReader.createFileFromManifest(bagRootDir, parts[2]); - length = parts[1].equals("-") ? -1 : Long.decode(parts[1]); - url = new URL(parts[0]); - - logger.debug(messages.getString("read_fetch_file_line"), url, length, parts[2], fetchFile); - final FetchItem itemToFetch = new FetchItem(url, length, path); - itemsToFetch.add(itemToFetch); + if(line.matches(FETCH_LINE_REGEX) && !line.matches("\\s*")){ + parts = line.split("\\s+", 3); + final Path path = TagFileReader.createFileFromManifest(bagRootDir, parts[2]); + length = parts[1].equals("-") ? -1 : Long.decode(parts[1]); + url = new URL(parts[0]); + + logger.debug(messages.getString("read_fetch_file_line"), url, length, parts[2], fetchFile); + final FetchItem itemToFetch = new FetchItem(url, length, path); + itemsToFetch.add(itemToFetch); + } + else{ + throw new InvalidBagitFileFormatException(messages.getString("invalid_fetch_file_line_error").replace("{}", line)); + } line = reader.readLine(); } diff --git a/src/main/resources/MessageBundle.properties b/src/main/resources/MessageBundle.properties index 5684987ca..db5ab2091 100644 --- a/src/main/resources/MessageBundle.properties +++ b/src/main/resources/MessageBundle.properties @@ -115,6 +115,7 @@ unparsable_version_error=Version must be in format MAJOR.MINOR but was [{}]! #for FetchReader.java reading_fetch_file=Attempting to read [{}]. read_fetch_file_line=Read URL [{}] length [{}] path [{}] from fetch file [{}]. +invalid_fetch_file_line_error=The Line [{}] is invalid for fetch.txt. Each line must take the form of . #for KeyValueReader.java read_key_value_line=Found key [{}] value [{}] in file [{}] using split regex [{}]. diff --git a/src/test/java/gov/loc/repository/bagit/reader/FetchReaderTest.java b/src/test/java/gov/loc/repository/bagit/reader/FetchReaderTest.java index fda948fe2..45db8099f 100644 --- a/src/test/java/gov/loc/repository/bagit/reader/FetchReaderTest.java +++ b/src/test/java/gov/loc/repository/bagit/reader/FetchReaderTest.java @@ -66,6 +66,12 @@ public void testReadFetchWithSizeSpecified() throws Exception{ } } + @Test(expected=InvalidBagitFileFormatException.class) + public void testReadBlankLinesThrowsException() throws Exception{ + Path fetchFile = Paths.get(getClass().getClassLoader().getResource("fetchFiles/fetchWithBlankLines.txt").toURI()); + FetchReader.readFetch(fetchFile, StandardCharsets.UTF_8, Paths.get("/foo")); + } + @Test(expected=InvalidBagitFileFormatException.class) public void testReadWindowsSpecialDirMaliciousFetchThrowsException() throws Exception{ Path fetchFile = Paths.get(getClass().getClassLoader().getResource("maliciousFetchFile/windowsSpecialDirectoryName.txt").toURI()); @@ -92,4 +98,16 @@ public void testReadFileUrlMaliciousFetchThrowsException() throws Exception{ } throw new MaliciousPathException("Skipping for windows cause it isn't valid"); } + + @Test + public void foo(){ + String regex = ".*[ \t]*(\\d*|-)[ \t]*.*"; + String test1 = "http://localhost/foo/data/test2.txt - ~/foo/bar/ham.txt"; + String test2 = "http://localhost/foo/data/dir1/test3.txt 100057 data/dir1/test3.txt"; + String test3 = "http://localhost/foo/data/dir1/test3.txt \t 100057 \t data/dir1/test3.txt"; + + System.err.println(test1.matches(regex)); + System.err.println(test2.matches(regex)); + System.err.println(test3.matches(regex)); + } } diff --git a/src/test/resources/fetchFiles/fetchWithBlankLines.txt b/src/test/resources/fetchFiles/fetchWithBlankLines.txt new file mode 100644 index 000000000..239d9bf40 --- /dev/null +++ b/src/test/resources/fetchFiles/fetchWithBlankLines.txt @@ -0,0 +1,7 @@ +http://localhost/foo/data/dir1/test3.txt 1 data/dir1/test3.txt +http://localhost/foo/data/dir2/dir3/test5.txt 2 data/dir2/dir3/test5.txt +http://localhost/foo/data/dir2/test4.txt 3 data/dir2/test4.txt +http://localhost/foo/data/test%201.txt 4 data/test 1.txt +http://localhost/foo/data/test2.txt 5 data/test2.txt + + \ No newline at end of file