From 36a845952b977a11eca5a6133afd780d6476a335 Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Sat, 8 Aug 2020 01:04:01 -0700 Subject: [PATCH 1/4] Add agencyId to RouteShort model --- .../java/org/opentripplanner/index/model/RouteShort.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/opentripplanner/index/model/RouteShort.java b/src/main/java/org/opentripplanner/index/model/RouteShort.java index 69af010a5db..78775024e05 100644 --- a/src/main/java/org/opentripplanner/index/model/RouteShort.java +++ b/src/main/java/org/opentripplanner/index/model/RouteShort.java @@ -3,6 +3,7 @@ import java.util.Collection; import java.util.List; +import org.opentripplanner.model.Agency; import org.opentripplanner.model.FeedScopedId; import org.opentripplanner.model.Route; import org.opentripplanner.gtfs.GtfsLibrary; @@ -16,6 +17,7 @@ public class RouteShort { public String longName; public String mode; public String color; + public String agencyId; public String agencyName; public Integer sortOrder; @@ -25,7 +27,9 @@ public RouteShort (Route route) { longName = route.getLongName(); mode = GtfsLibrary.getTraverseMode(route).toString(); color = route.getColor(); - agencyName = route.getAgency().getName(); + Agency agency = route.getAgency(); + agencyId = agency.getId(); + agencyName = agency.getName(); sortOrder = route.getSortOrder(); } From a59155535d667d18aed6da9f9b5158c5792cec81 Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Wed, 26 Aug 2020 18:33:40 -0700 Subject: [PATCH 2/4] Better handle feed and agency IDs Refs https://github.com/opentripplanner/OpenTripPlanner/issues/2838 --- .../graph_builder/module/GtfsFeedId.java | 31 ++++++- .../graph_builder/module/GtfsModule.java | 92 ++++++++++++++++++- .../java/org/opentripplanner/GtfsTest.java | 2 +- .../module/GtfsGraphBuilderModuleTest.java | 2 +- .../factory/PatternHopFactoryTest.java | 2 +- 5 files changed, 123 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java index a9afc24b032..21546b9b566 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java @@ -6,6 +6,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Set; /** * Represent a feed id in a GTFS feed. @@ -16,11 +18,21 @@ public class GtfsFeedId { */ private static int FEED_ID_COUNTER = 1; + /** + * A Map that keeps track of which feed ids were generated to make sure there are no duplicates + */ + private static final Set seenIds = new HashSet<>(); + /** * The id for the feed */ private final String id; + /** + * Whether the ID was autogenerated using the FEED_ID_COUNTER. + */ + private final boolean autoGenerated; + /** * Constructs a new feed id. * @@ -28,14 +40,18 @@ public class GtfsFeedId { * * @param id The feed id */ - private GtfsFeedId(String id) { + private GtfsFeedId(String id, boolean autoGenerated) { this.id = id; + this.autoGenerated = autoGenerated; } public String getId() { return id; } + public boolean isAutoGenerated() { + return autoGenerated; + } public static class Builder { @@ -99,12 +115,23 @@ protected String cleanId(String id) { * @return A GtfsFeedId */ public GtfsFeedId build() { + boolean autoGenerated = false; id = cleanId(id); if (id == null || id.trim().length() == 0) { id = String.valueOf(FEED_ID_COUNTER); + autoGenerated = true; + } + if (seenIds.contains(id)) { + throw new RuntimeException( + String.format( + "A duplicate Feed ID of `%s` was encountered when loading GTFS data!", + id + ) + ); } + seenIds.add(id); FEED_ID_COUNTER++; - return new GtfsFeedId(id); + return new GtfsFeedId(id, autoGenerated); } } } diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java index 7d64f386486..74aaf913d4b 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java @@ -3,10 +3,13 @@ import java.awt.Color; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.Serializable; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; +import com.csvreader.CsvReader; import org.onebusaway.csv_entities.EntityHandler; import org.onebusaway.gtfs.impl.GtfsRelationalDaoImpl; import org.onebusaway.gtfs.model.Agency; @@ -63,6 +66,23 @@ public class GtfsModule implements GraphBuilderModule { private List gtfsBundles; + /** + * A Set of bundleFilenames that had an auto-generated Feed ID. This helps keep track of whether it is possible to + * generate deterministic Feed IDs with this set of bundles. + */ + private final Set bundleFilenamesWithAutoGeneratedFeedId = new HashSet<>(); + + /** + * A Set to keep track of which gtfsFeedIds were auto-generated. + */ + private final Set autoGeneratedFeedIds = new HashSet<>(); + + /** + * A Set that keeps track of which agencies had auto-generated IDs. The values in here consist of the strings in the + * template `{FeedID}{AgencyID}`. + */ + private final Set autoGeneratedAgencyIds = new HashSet<>(); + public Boolean getUseCached() { return useCached; } @@ -165,6 +185,32 @@ public void buildGraph(Graph graph, GraphBuilderModuleSummary graphBuilderModule graph.hasTransit = true; graph.calculateTransitCenter(); + + // log information about the feed and agency IDs now present in the graph + LOG.info("Loaded the following feeds and agencies into the graph:"); + for (String feedId : graph.getFeedIds()) { + Collection feedAgencies = graph.getAgencies(feedId); + if (feedAgencies.size() > 0) { + LOG.info( + "Feed with ID `{}`{}", + feedId, + autoGeneratedFeedIds.contains(feedId) + ? " (ID was auto-generated)" + : "" + ); + for (org.opentripplanner.model.Agency feedAgency : feedAgencies) { + LOG.info( + " - Agency `{}` with ID `{}`{}", + feedAgency.getName(), + feedAgency.getId(), + autoGeneratedAgencyIds.contains(feedId + feedAgency.getId()) + ? " (ID was auto-generated)" + : "" + ); + } + } + } + LOG.info(postBundleTask.finish()); } @@ -181,6 +227,38 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) GtfsFeedId gtfsFeedId = gtfsBundle.getFeedId(); + // check for duplicate bundle names and make sure that at most one of those bundles contains an auto-generated + // feed ID. If there is more than one, non-deterministic Feed IDs could be generated. + String bundleFilename = gtfsBundle.getPath().getName(); + if (bundleFilenamesWithAutoGeneratedFeedId.contains(bundleFilename)) { + throw new RuntimeException( + String.format( + "There are at least 2 bundles with the name `%s` that lack a feed_id value in a record in the feed_info.txt table. This will result in non-deterministic Feed ID generation!", + bundleFilename + ) + ); + } + if (gtfsFeedId.isAutoGenerated()) { + bundleFilenamesWithAutoGeneratedFeedId.add(bundleFilename); + autoGeneratedFeedIds.add(gtfsFeedId.getId()); + } + + // read the agency table to determine if there is a single agency entry without an agency ID. This is used later + // to determine whether an agency ID was auto-generated. + boolean agencyFileContainedOneAgencyWithoutId = false; + InputStream agencyInputStream = gtfsBundle.getCsvInputSource().getResource("agency.txt"); + try { + CsvReader result = new CsvReader(agencyInputStream, StandardCharsets.UTF_8); + result.readHeaders(); + result.readRecord(); + String firstAgencyId = result.get("agency_id"); + if (firstAgencyId == null || firstAgencyId.trim().length() == 0) { + agencyFileContainedOneAgencyWithoutId = true; + } + } finally { + agencyInputStream.close(); + } + GtfsReader reader = new GtfsReader(); reader.setInputSource(gtfsBundle.getCsvInputSource()); reader.setEntityStore(store); @@ -204,6 +282,8 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) for (Agency agency : reader.getAgencies()) { String agencyId = agency.getId(); LOG.info("This Agency has the ID {}", agencyId); + boolean agencyIdIsAutogenerated = false; + // Make sure the combination of the FeedId and agencyId is unique // Somehow, when the agency's id field is missing, OBA replaces it with the agency's name. // TODO Figure out how and why this is happening. if (agencyId == null || agencyIdsSeen.contains(gtfsFeedId.getId() + agencyId)) { @@ -217,8 +297,18 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) reader.addAgencyIdMapping(agencyId, generatedAgencyId); // NULL key should work agency.setId(generatedAgencyId); agencyId = generatedAgencyId; + agencyIdIsAutogenerated = true; + } else { + // the agencyId was not null or was non-conflicting. This means that either the agencyId was + // derived from a record in the agency.txt table, or it was assigned the default AgencyId which + // is the feedID of this feed. + agencyIdIsAutogenerated = agencyFileContainedOneAgencyWithoutId; + } + String gtfsFeedIdAndAgencyId = gtfsFeedId.getId() + agencyId; + agencyIdsSeen.add(gtfsFeedIdAndAgencyId); + if (agencyIdIsAutogenerated) { + autoGeneratedAgencyIds.add(gtfsFeedIdAndAgencyId); } - if (agencyId != null) agencyIdsSeen.add(gtfsFeedId.getId() + agencyId); } } } diff --git a/src/test/java/org/opentripplanner/GtfsTest.java b/src/test/java/org/opentripplanner/GtfsTest.java index 7311f0a56ad..a997d973b79 100644 --- a/src/test/java/org/opentripplanner/GtfsTest.java +++ b/src/test/java/org/opentripplanner/GtfsTest.java @@ -56,7 +56,7 @@ protected void setUp() { File gtfs = new File("src/test/resources/" + getFeedName()); File gtfsRealTime = new File("src/test/resources/" + getFeedName() + ".pb"); GtfsBundle gtfsBundle = new GtfsBundle(gtfs); - feedId = new GtfsFeedId.Builder().id("FEED").build(); + feedId = new GtfsFeedId.Builder().id("").build(); gtfsBundle.setFeedId(feedId); List gtfsBundleList = Collections.singletonList(gtfsBundle); GtfsModule gtfsGraphBuilderImpl = new GtfsModule(gtfsBundleList); diff --git a/src/test/java/org/opentripplanner/graph_builder/module/GtfsGraphBuilderModuleTest.java b/src/test/java/org/opentripplanner/graph_builder/module/GtfsGraphBuilderModuleTest.java index ffd7539eaf5..7808bb533bc 100644 --- a/src/test/java/org/opentripplanner/graph_builder/module/GtfsGraphBuilderModuleTest.java +++ b/src/test/java/org/opentripplanner/graph_builder/module/GtfsGraphBuilderModuleTest.java @@ -91,7 +91,7 @@ private MockGtfs getSimpleGtfs() throws IOException { private static List getGtfsAsBundleList (MockGtfs gtfs) { GtfsBundle bundle = new GtfsBundle(); - bundle.setFeedId(new GtfsFeedId.Builder().id("FEED").build()); + bundle.setFeedId(new GtfsFeedId.Builder().id("").build()); bundle.setPath(gtfs.getPath()); List list = Lists.newArrayList(); list.add(bundle); diff --git a/src/test/java/org/opentripplanner/routing/edgetype/factory/PatternHopFactoryTest.java b/src/test/java/org/opentripplanner/routing/edgetype/factory/PatternHopFactoryTest.java index 377f1feb0aa..ec20dbf66f6 100644 --- a/src/test/java/org/opentripplanner/routing/edgetype/factory/PatternHopFactoryTest.java +++ b/src/test/java/org/opentripplanner/routing/edgetype/factory/PatternHopFactoryTest.java @@ -26,7 +26,7 @@ public void testBikesAllowed() throws IOException { gtfs.putLines("frequencies.txt", "trip_id,start_time,end_time,headway_secs", "t0,09:00:00,17:00:00,300"); - GtfsFeedId feedId = new GtfsFeedId.Builder().id("FEED").build(); + GtfsFeedId feedId = new GtfsFeedId.Builder().id("").build(); PatternHopFactory factory = new PatternHopFactory( GtfsLibrary.createContext(feedId, gtfs.read()) ); From ee5cd0acb7b4d68afd8496dabcaa49bae6c3d62e Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Tue, 1 Sep 2020 18:04:16 -0700 Subject: [PATCH 3/4] Add some clarification comments --- .../opentripplanner/graph_builder/module/GtfsFeedId.java | 3 ++- .../opentripplanner/graph_builder/module/GtfsModule.java | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java index 21546b9b566..dd930d7190c 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java @@ -19,7 +19,8 @@ public class GtfsFeedId { private static int FEED_ID_COUNTER = 1; /** - * A Map that keeps track of which feed ids were generated to make sure there are no duplicates + * A Map that keeps track of which feed ids were generated in the {@link Builder#build()} method to make sure there + * are no duplicates. */ private static final Set seenIds = new HashSet<>(); diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java index 74aaf913d4b..0e91494232b 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java @@ -243,8 +243,9 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) autoGeneratedFeedIds.add(gtfsFeedId.getId()); } - // read the agency table to determine if there is a single agency entry without an agency ID. This is used later - // to determine whether an agency ID was auto-generated. + // Read the agency table to determine if there is a single agency entry without an agency ID. This is used later + // to determine whether an agency ID was auto-generated. According to the GTFS spec, if there are multiple + // agencies, then they must have an agency ID. Therefore, it is only necessary to read the first record. boolean agencyFileContainedOneAgencyWithoutId = false; InputStream agencyInputStream = gtfsBundle.getCsvInputSource().getResource("agency.txt"); try { @@ -301,7 +302,8 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) } else { // the agencyId was not null or was non-conflicting. This means that either the agencyId was // derived from a record in the agency.txt table, or it was assigned the default AgencyId which - // is the feedID of this feed. + // is the feedID of this feed. Therefore, set whether the agencyID was auto-generated based on + // whether the agency.txt had a single agency record without an agency_id. agencyIdIsAutogenerated = agencyFileContainedOneAgencyWithoutId; } String gtfsFeedIdAndAgencyId = gtfsFeedId.getId() + agencyId; From df70dee232e803f1182d6b764ba3f4ea6e534522 Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Thu, 3 Sep 2020 17:34:23 -0700 Subject: [PATCH 4/4] Add some more comments and a test suite --- .../graph_builder/module/GtfsFeedId.java | 8 ++++--- .../graph_builder/module/GtfsModule.java | 6 ++++-- .../graph_builder/module/GtfsFeedIdTest.java | 21 +++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 src/test/java/org/opentripplanner/graph_builder/module/GtfsFeedIdTest.java diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java index dd930d7190c..a9522892e43 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsFeedId.java @@ -19,7 +19,7 @@ public class GtfsFeedId { private static int FEED_ID_COUNTER = 1; /** - * A Map that keeps track of which feed ids were generated in the {@link Builder#build()} method to make sure there + * A Set that keeps track of which feed ids were generated in the {@link Builder#build()} method to make sure there * are no duplicates. */ private static final Set seenIds = new HashSet<>(); @@ -118,11 +118,14 @@ protected String cleanId(String id) { public GtfsFeedId build() { boolean autoGenerated = false; id = cleanId(id); + // auto-generate a new ID if it is null or a zero-length string if (id == null || id.trim().length() == 0) { id = String.valueOf(FEED_ID_COUNTER); autoGenerated = true; } - if (seenIds.contains(id)) { + // add the feed ID to the set of IDs seen so far. If the set already contained the ID, throw an error in + // order to avoid using duplicate feed IDs. + if (!seenIds.add(id)) { throw new RuntimeException( String.format( "A duplicate Feed ID of `%s` was encountered when loading GTFS data!", @@ -130,7 +133,6 @@ public GtfsFeedId build() { ) ); } - seenIds.add(id); FEED_ID_COUNTER++; return new GtfsFeedId(id, autoGenerated); } diff --git a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java index 0e91494232b..86fe4862763 100644 --- a/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java +++ b/src/main/java/org/opentripplanner/graph_builder/module/GtfsModule.java @@ -233,7 +233,7 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) if (bundleFilenamesWithAutoGeneratedFeedId.contains(bundleFilename)) { throw new RuntimeException( String.format( - "There are at least 2 bundles with the name `%s` that lack a feed_id value in a record in the feed_info.txt table. This will result in non-deterministic Feed ID generation!", + "There are at least 2 GTFS bundles with the name `%s` that lack a feed_id value in a record in the feed_info.txt table. This will result in non-deterministic Feed ID generation!", bundleFilename ) ); @@ -244,7 +244,8 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) } // Read the agency table to determine if there is a single agency entry without an agency ID. This is used later - // to determine whether an agency ID was auto-generated. According to the GTFS spec, if there are multiple + // to determine whether an agency ID was auto-generated (it will be auto-generated to the bundle's feed ID if + // the agency ID is missing in a single-agency feed). According to the GTFS spec, if there are multiple // agencies, then they must have an agency ID. Therefore, it is only necessary to read the first record. boolean agencyFileContainedOneAgencyWithoutId = false; InputStream agencyInputStream = gtfsBundle.getCsvInputSource().getResource("agency.txt"); @@ -264,6 +265,7 @@ private GtfsMutableRelationalDao loadBundle(GtfsBundle gtfsBundle) reader.setInputSource(gtfsBundle.getCsvInputSource()); reader.setEntityStore(store); reader.setInternStrings(true); + // Set the default Agency ID to be the Feed ID of this bundle. reader.setDefaultAgencyId(gtfsFeedId.getId()); if (LOG.isDebugEnabled()) diff --git a/src/test/java/org/opentripplanner/graph_builder/module/GtfsFeedIdTest.java b/src/test/java/org/opentripplanner/graph_builder/module/GtfsFeedIdTest.java new file mode 100644 index 00000000000..26d98bb306b --- /dev/null +++ b/src/test/java/org/opentripplanner/graph_builder/module/GtfsFeedIdTest.java @@ -0,0 +1,21 @@ +package org.opentripplanner.graph_builder.module; + +import org.junit.Test; + +public class GtfsFeedIdTest { + @Test + public void shouldUseProvidedFeedId() { + assert(new GtfsFeedId.Builder().id("abcd").build().getId().equals("abcd")); + } + + @Test + public void canAutoGenerateFeedId() { + assert(Integer.valueOf(new GtfsFeedId.Builder().id("").build().getId()) > 0); + } + + @Test(expected = RuntimeException.class) + public void throwsErrorOnDuplicateFeedId() { + new GtfsFeedId.Builder().id("abc123").build(); + new GtfsFeedId.Builder().id("abc123").build(); + } +} \ No newline at end of file