From 3e787f8d19037fbcad0a9486dc1eb2a1f6dc1daf Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Tue, 12 May 2020 20:00:03 -0700 Subject: [PATCH] Expose additional copier options to dmfr import (#55) --- copier/copier.go | 2 +- dmfr/import.go | 20 +++++++++++-------- dmfr/import_cmd.go | 49 ++++++++++++++++++++++++---------------------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/copier/copier.go b/copier/copier.go index c7581f69..f65b355b 100644 --- a/copier/copier.go +++ b/copier/copier.go @@ -104,7 +104,7 @@ func NewCopier(reader gotransit.Reader, writer gotransit.Writer) Copier { copier := Copier{ Reader: reader, Writer: writer, - BatchSize: 100000, + BatchSize: 1000000, AllowEntityErrors: false, AllowReferenceErrors: false, InterpolateStopTimes: false, diff --git a/dmfr/import.go b/dmfr/import.go index a633b899..731caf77 100644 --- a/dmfr/import.go +++ b/dmfr/import.go @@ -16,11 +16,13 @@ import ( // ImportOptions sets various options for importing a feed. type ImportOptions struct { - FeedVersionID int - Extensions []string - Directory string - S3 string - Activate bool + FeedVersionID int + Extensions []string + Directory string + S3 string + Activate bool + CreateMissingShapes bool + InterpolateStopTimes bool } // ImportResult contains the results of a feed import. @@ -220,12 +222,14 @@ func ImportFeedVersion(atx gtdb.Adapter, fv gotransit.FeedVersion, opts ImportOp } cp.AddExtension(ext) } - cp.BatchSize = 1000000 + // Settable options + cp.CreateMissingShapes = opts.CreateMissingShapes + cp.InterpolateStopTimes = opts.InterpolateStopTimes + // Non-settable options cp.AllowEntityErrors = false cp.AllowReferenceErrors = false cp.NormalizeServiceIDs = true - cp.CreateMissingShapes = true - cp.InterpolateStopTimes = true + // Go cpresult := cp.Copy() if cpresult == nil { return fvi, errors.New("copy result was nil") diff --git a/dmfr/import_cmd.go b/dmfr/import_cmd.go index 1f2f307a..bbe40e4c 100644 --- a/dmfr/import_cmd.go +++ b/dmfr/import_cmd.go @@ -13,46 +13,47 @@ import ( // ImportCommand imports FeedVersions into a database. type ImportCommand struct { - S3 string - Directory string - Activate bool - Extensions arrayFlags - Workers int - Limit int - DBURL string - CoverDate string - FetchedSince string - Latest bool - DryRun bool - FeedIDs []string - FVIDs arrayFlags - Adapter gtdb.Adapter // allow for mocks + Workers int + Limit int + DBURL string + CoverDate string + FetchedSince string + Latest bool + DryRun bool + FeedIDs []string + FVIDs arrayFlags + Adapter gtdb.Adapter // allow for mocks + ImportOptions ImportOptions } // Parse command line flags func (cmd *ImportCommand) Parse(args []string) error { + extflags := arrayFlags{} fl := flag.NewFlagSet("import", flag.ExitOnError) fl.Usage = func() { fmt.Println("Usage: import [feedids...]") fl.PrintDefaults() } - fl.Var(&cmd.Extensions, "ext", "Include GTFS Extension") + fl.Var(&extflags, "ext", "Include GTFS Extension") fl.Var(&cmd.FVIDs, "fvid", "Import specific feed version ID") fl.IntVar(&cmd.Workers, "workers", 1, "Worker threads") fl.StringVar(&cmd.DBURL, "dburl", "", "Database URL (default: $DMFR_DATABASE_URL)") - fl.StringVar(&cmd.Directory, "gtfsdir", ".", "GTFS Directory") - fl.StringVar(&cmd.S3, "s3", "", "Get GTFS files from S3 bucket/prefix") + fl.StringVar(&cmd.ImportOptions.Directory, "gtfsdir", ".", "GTFS Directory") + fl.StringVar(&cmd.ImportOptions.S3, "s3", "", "Get GTFS files from S3 bucket/prefix") fl.StringVar(&cmd.CoverDate, "date", "", "Service on date") fl.StringVar(&cmd.FetchedSince, "fetched-since", "", "Fetched since") fl.IntVar(&cmd.Limit, "limit", 0, "Import at most n feeds") fl.BoolVar(&cmd.Latest, "latest", false, "Only import latest feed version available for each feed") fl.BoolVar(&cmd.DryRun, "dryrun", false, "Dry run; print feeds that would be imported and exit") - fl.BoolVar(&cmd.Activate, "activate", false, "Set as active feed version after import") + fl.BoolVar(&cmd.ImportOptions.Activate, "activate", false, "Set as active feed version after import") + fl.BoolVar(&cmd.ImportOptions.InterpolateStopTimes, "interpolate-stop-times", false, "Interpolate missing StopTime arrival/departure values") + fl.BoolVar(&cmd.ImportOptions.CreateMissingShapes, "create-missing-shapes", false, "Create missing Shapes from Trip stop-to-stop geometries") fl.Parse(args) cmd.FeedIDs = fl.Args() if cmd.DBURL == "" { cmd.DBURL = os.Getenv("DMFR_DATABASE_URL") } + cmd.ImportOptions.Extensions = extflags return nil } @@ -115,11 +116,13 @@ func (cmd *ImportCommand) Run() error { results := make(chan ImportResult, len(qrs)) for _, fvid := range qrs { jobs <- ImportOptions{ - FeedVersionID: fvid, - Directory: cmd.Directory, - S3: cmd.S3, - Extensions: cmd.Extensions, - Activate: cmd.Activate, + FeedVersionID: fvid, + Directory: cmd.ImportOptions.Directory, + S3: cmd.ImportOptions.S3, + Extensions: cmd.ImportOptions.Extensions, + Activate: cmd.ImportOptions.Activate, + InterpolateStopTimes: cmd.ImportOptions.InterpolateStopTimes, + CreateMissingShapes: cmd.ImportOptions.CreateMissingShapes, } } close(jobs)