Skip to content

Commit

Permalink
Adjust GenBank download chunks to not surpass today's date
Browse files Browse the repository at this point in the history
  • Loading branch information
atc3 committed May 23, 2024
1 parent e9ab1f1 commit a33265f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
7 changes: 5 additions & 2 deletions workflow_flu_genbank_ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ static_data_folder = os.path.join("..", config["static_data_folder"])
min_date = pd.to_datetime(config.get('min_date', '2019-12-01'))
if min_date is None:
min_date = '2019-12-01'
max_date = pd.to_datetime(datetime.date.today().isoformat())
max_date = pd.to_datetime(
config.get('end_date_cutoff',
(datetime.date.today() - datetime.timedelta(days=14)).isoformat()
))
if max_date is None:
max_date = datetime.date.today().isoformat()
max_date = (datetime.date.today() - datetime.timedelta(days=14)).isoformat()

chunks = [d for d in pd.period_range(start=min_date, end=max_date, freq=config.get('dl_chunk_period', 'W'))]
DL_CHUNKS = [i for i in range(len(chunks))]
Expand Down
7 changes: 5 additions & 2 deletions workflow_rsv_genbank_ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ static_data_folder = os.path.join("..", config["static_data_folder"])
min_date = pd.to_datetime(config.get('min_date', '2019-12-01'))
if min_date is None:
min_date = '2019-12-01'
max_date = pd.to_datetime(datetime.date.today().isoformat())
max_date = pd.to_datetime(
config.get('end_date_cutoff',
(datetime.date.today() - datetime.timedelta(days=14)).isoformat()
))
if max_date is None:
max_date = datetime.date.today().isoformat()
max_date = (datetime.date.today() - datetime.timedelta(days=14)).isoformat()

chunks = [d for d in pd.period_range(start=min_date, end=max_date, freq=config.get('dl_chunk_period', 'W'))]
DL_CHUNKS = [i for i in range(len(chunks))]
Expand Down
8 changes: 6 additions & 2 deletions workflow_sars2_genbank_ingest/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,17 @@ rule all:
min_date = pd.to_datetime(config.get('min_date', '2019-12-01'))
if min_date is None:
min_date = '2019-12-01'
max_date = pd.to_datetime(config.get('end_date_cutoff', datetime.date.today().isoformat()))
max_date = pd.to_datetime(
config.get('end_date_cutoff',
(datetime.date.today() - datetime.timedelta(days=14)).isoformat()
))
if max_date is None:
max_date = datetime.date.today().isoformat()
max_date = (datetime.date.today() - datetime.timedelta(days=14)).isoformat()

chunks = [d for d in pd.period_range(start=min_date, end=max_date, freq=config.get('dl_chunk_period', 'W'))]
DL_CHUNKS = [i for i in range(len(chunks))]


rule download_metadata_chunk:
"""Download the data feed in chunks, to avoid timeouts.
"""
Expand Down

0 comments on commit a33265f

Please sign in to comment.