diff --git a/ingest/Snakefile b/ingest/Snakefile index fb26cc1..8c916ce 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -54,7 +54,7 @@ rule all: _get_all_targets, -include: "rules/fetch_sequences.smk" +include: "rules/fetch_from_ncbi.smk" include: "rules/transform.smk" diff --git a/ingest/rules/fetch_sequences.smk b/ingest/rules/fetch_from_ncbi.smk similarity index 94% rename from ingest/rules/fetch_sequences.smk rename to ingest/rules/fetch_from_ncbi.smk index 2fef4b1..c775819 100644 --- a/ingest/rules/fetch_sequences.smk +++ b/ingest/rules/fetch_from_ncbi.smk @@ -7,6 +7,10 @@ defined in the config. If adding other sources, add a new rule upstream of rule `fetch_all_sequences` to create the file `data/{source}.ndjson` or the file must exist as a static file in the repo. +Fetch with NCBI Datasets (https://www.ncbi.nlm.nih.gov/datasets/) + - requires `ncbi_taxon_id` config + - Only returns metadata fields that are available through NCBI Datasets + Produces final output as sequences_ndjson = "data/sequences.ndjson"