diff --git a/pyproject.toml b/pyproject.toml index 19eacbe..83b0029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ test = [ "pytest >=6", "pytest-cov >=3", "mypy >=1.10", + "types-tqdm", ] dev = [ "pytest >=6", diff --git a/src/aross_stations_db/cli.py b/src/aross_stations_db/cli.py index d54abed..5e0ffdb 100644 --- a/src/aross_stations_db/cli.py +++ b/src/aross_stations_db/cli.py @@ -5,7 +5,7 @@ from aross_stations_db.config import CliLoadSettings from aross_stations_db.db.setup import ( - generate_event_objects, + generate_event_object, load_events, load_stations, recreate_tables, @@ -52,10 +52,11 @@ def init(skip_load: bool = False) -> None: # The event processing steps are split into stages to provide better feadback at # runtime. On slower systems, it can be unclear what the bottleneck is. In the # long run, we should try to optimize this after learning more. - events = generate_event_objects( - tqdm(raw_events, desc="Reading events"), - ) + events = [ + generate_event_object(e) for e in tqdm(raw_events, desc="Reading events") + ] + # TODO: Is there any way we can monitor this process with a progress bar? logger.info("Loading events; this can take a minute or so") load_events(events, session=db_session) logger.info("Loaded events") diff --git a/src/aross_stations_db/db/setup.py b/src/aross_stations_db/db/setup.py index 732e360..5bb91ab 100644 --- a/src/aross_stations_db/db/setup.py +++ b/src/aross_stations_db/db/setup.py @@ -1,5 +1,4 @@ import datetime as dt -from collections.abc import Iterator from sqlalchemy import MetaData, insert from sqlalchemy.orm import Session @@ -65,20 +64,17 @@ def load_stations(stations: list[dict[str, str]], *, session: Session) -> None: session.commit() -def generate_event_objects(raw_events: Iterator[dict[str, str]]) -> list[Event]: - return [ - Event( - station_id=event["station_id"], - time_start=dt.datetime.fromisoformat(event["start"]), - time_end=dt.datetime.fromisoformat(event["end"]), - snow_on_ground=_snow_on_ground_status(event["sog"]), - rain_hours=int(event["RA"]), - freezing_rain_hours=int(event["FZRA"]), - solid_precipitation_hours=int(event["SOLID"]), - unknown_precipitation_hours=int(event["UP"]), - ) - for event in raw_events - ] +def generate_event_object(raw_event: dict[str, str]) -> Event: + return Event( + station_id=raw_event["station_id"], + time_start=dt.datetime.fromisoformat(raw_event["start"]), + time_end=dt.datetime.fromisoformat(raw_event["end"]), + snow_on_ground=_snow_on_ground_status(raw_event["sog"]), + rain_hours=int(raw_event["RA"]), + freezing_rain_hours=int(raw_event["FZRA"]), + solid_precipitation_hours=int(raw_event["SOLID"]), + unknown_precipitation_hours=int(raw_event["UP"]), + ) def load_events(events: list[Event], *, session: Session) -> None: