From 63c945a27ec9a36452cfc926ebd114f4fd2e44eb Mon Sep 17 00:00:00 2001 From: miles-grant-ibi Date: Tue, 14 May 2024 11:03:19 -0400 Subject: [PATCH 1/3] update tests to cover newline-based edge cases --- .../jobs/ArbitraryTransformJobTest.java | 75 +++++++++++++++++++ .../agency.txt | 2 + .../calendar.txt | 3 + .../feed_info.txt | 2 + .../routes.txt | 3 + .../stop_attributes.txt | 3 + .../stop_times.txt | 5 ++ .../stops.txt | 7 ++ .../trips.txt | 3 + 9 files changed, 103 insertions(+) create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/agency.txt create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/calendar.txt create mode 100644 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/feed_info.txt create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/routes.txt create mode 100644 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_attributes.txt create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_times.txt create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stops.txt create mode 100755 src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/trips.txt diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java index eb25b619c..40fa2cdba 100644 --- a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java +++ b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java @@ -216,6 +216,71 @@ void canAppendToStops() throws SQLException, IOException { 1 ); } + + + @Test + void canAppendToStopsWithLeadingNewlineInData() throws SQLException, IOException { + sourceVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines") + ); + FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithLeadingNewline(), "stops"); + FeedTransformRules transformRules = new FeedTransformRules(transformation); + feedSource.transformRules.add(transformRules); + Persistence.feedSources.replace(feedSource.id, feedSource); + // Create new target version (note: the folder has no stop_attributes.txt file) + targetVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-dates") + ); + LOG.info("Checking assertions."); + assertEquals( + 5 + 3, // Magic number should match row count of stops.txt with three extra + targetVersion.feedLoadResult.stops.rowCount, + "stops.txt row count should equal input csv data # of rows + 3 extra rows" + ); + // Check for presence of new stop id in database (one record). + assertThatSqlCountQueryYieldsExpectedCount( + String.format( + "SELECT count(*) FROM %s.stops WHERE stop_id = '%s'", + targetVersion.namespace, + "new" + ), + 1 + ); + } + @Test + void canAppendToStopsWithTrailingNewlineInData() throws SQLException, IOException { + sourceVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines") + ); + FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithTrailingNewline(), "stops"); + FeedTransformRules transformRules = new FeedTransformRules(transformation); + feedSource.transformRules.add(transformRules); + Persistence.feedSources.replace(feedSource.id, feedSource); + // Create new target version (note: the folder has no stop_attributes.txt file) + targetVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-dates") + ); + LOG.info("Checking assertions."); + assertEquals( + 5 + 3, // Magic number should match row count of stops.txt with three extra + targetVersion.feedLoadResult.stops.rowCount, + "stops.txt row count should equal input csv data # of rows + 3 extra rows" + ); + // Check for presence of new stop id in database (one record). + assertThatSqlCountQueryYieldsExpectedCount( + String.format( + "SELECT count(*) FROM %s.stops WHERE stop_id = '%s'", + targetVersion.namespace, + "new" + ), + 1 + ); + } + @Test void canReplaceFeedInfo() throws SQLException, IOException { // Generate random UUID for feedId, which gets placed into the csv data. @@ -311,6 +376,16 @@ private static String generateStopRow() { "\nnew2,new2,appended stop,,37,-122,,,0,123,," + "\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,"; } + private static String generateStopRowWithLeadingNewline() { + return "\nnew3,new3,appended stop,,37,-122,,,0,123,," + + "\nnew2,new2,appended stop,,37,-122,,,0,123,," + + "\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,"; + } + private static String generateStopRowWithTrailingNewline() { + return "new3,new3,appended stop,,37,-122,,,0,123,," + + "\nnew2,new2,appended stop,,37,-122,,,0,123,," + + "\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,\n"; + } private static String generateCustomCsvData() { return "custom_column1,custom_column2,custom_column3" diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/agency.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/agency.txt new file mode 100755 index 000000000..a916ce91b --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url +1,Fake Transit,,,,,America/Los_Angeles,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/calendar.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/calendar.txt new file mode 100755 index 000000000..0e75afb73 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/calendar.txt @@ -0,0 +1,3 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +common_id,1,1,1,1,1,1,1,20170918,20170920 +only_calendar_id,1,1,1,1,1,1,1,20170921,20170922 diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/feed_info.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/feed_info.txt new file mode 100644 index 000000000..ceac60810 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/feed_info.txt @@ -0,0 +1,2 @@ +feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version +fake_transit,Conveyal,http://www.conveyal.com,en,1.0 \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/routes.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/routes.txt new file mode 100755 index 000000000..b13480efa --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/routes.txt @@ -0,0 +1,3 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +1,1,1,Route 1,,3,,7CE6E7,FFFFFF, +1,2,2,Route 2,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_attributes.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_attributes.txt new file mode 100644 index 000000000..b77c473e0 --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_attributes.txt @@ -0,0 +1,3 @@ +stop_id,accessibility_id,cardinal_direction,relative_position,stop_city +4u6g,0,SE,FS,Scotts Valley +johv,0,SE,FS,Scotts Valley diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_times.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_times.txt new file mode 100755 index 000000000..646706c5c --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000, +only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961, +only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000, +only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961, \ No newline at end of file diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stops.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stops.txt new file mode 100755 index 000000000..b3f2e8d9f --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/stops.txt @@ -0,0 +1,7 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,, +johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, + +1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,, +1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,, diff --git a/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/trips.txt b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/trips.txt new file mode 100755 index 000000000..387b076cd --- /dev/null +++ b/src/test/resources/com/conveyal/datatools/gtfs/fake-agency-with-only-calendar-and-trailing-newlines/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id +1,only-calendar-trip1,,,0,,,0,0,common_id +2,only-calendar-trip2,,,0,,,0,0,common_id \ No newline at end of file From a4deb2c2cf4f1dd1abf72308d96338d4abb26286 Mon Sep 17 00:00:00 2001 From: miles-grant-ibi Date: Tue, 14 May 2024 11:18:11 -0400 Subject: [PATCH 2/3] correctly handle newlines while appending --- .../transform/AppendToFileTransformation.java | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java index d290afd66..65994582b 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java @@ -5,9 +5,11 @@ import com.conveyal.datatools.manager.models.TableTransformResult; import com.conveyal.datatools.manager.models.TransformType; +import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; +import java.io.FileReader; import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.StandardCharsets; @@ -49,20 +51,40 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); final File tempFile = File.createTempFile(tableName + "-temp", ".txt"); + final File tempFileWithStrippedNewlines = File.createTempFile(tableName + "-temp-no-newlines", ".txt"); Files.copy(targetTxtFilePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING); // Append CSV data into the target file in the temporary copy of file try (OutputStream os = new FileOutputStream(tempFile, true)) { - // Append a newline in case our data doesn't include one - // Having an extra newline is not a problem! os.write(newLineStream.readAllBytes()); os.write(inputStream.readAllBytes()); + os.flush(); + } catch (Exception e) { status.fail("Failed to write to target file", e); } + + // Re-write file without extra line breaks + try ( + OutputStream noNewlineOs = new FileOutputStream(tempFileWithStrippedNewlines, false); + FileReader fr = new FileReader(tempFile); + BufferedReader br = new BufferedReader(fr); + ) { + String line = null; + while ((line = br.readLine()) != null) { + if (line.matches("\n") || line.equals("")) { + continue; + } + + noNewlineOs.write(line.getBytes()); + noNewlineOs.write("\n".getBytes()); + } + noNewlineOs.flush(); + } + // Copy modified file into zip - Files.copy(tempFile.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + Files.copy(tempFileWithStrippedNewlines.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); final int NEW_LINE_CHARACTER_CODE = 10; int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count(); From bb5b2daa747dc101e2b24b29647cc082cf2317d9 Mon Sep 17 00:00:00 2001 From: miles-grant-ibi Date: Wed, 15 May 2024 08:39:10 -0400 Subject: [PATCH 3/3] refactor: address pr feedback --- .../models/transform/AppendToFileTransformation.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java index 65994582b..68f7cb9d4 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/AppendToFileTransformation.java @@ -67,13 +67,13 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st // Re-write file without extra line breaks try ( - OutputStream noNewlineOs = new FileOutputStream(tempFileWithStrippedNewlines, false); - FileReader fr = new FileReader(tempFile); - BufferedReader br = new BufferedReader(fr); + OutputStream noNewlineOs = new FileOutputStream(tempFileWithStrippedNewlines, false); + FileReader fr = new FileReader(tempFile); + BufferedReader br = new BufferedReader(fr); ) { - String line = null; + String line; while ((line = br.readLine()) != null) { - if (line.matches("\n") || line.equals("")) { + if (line.matches("\n") || line.isEmpty()) { continue; }