Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More event columns #8

Merged
merged 4 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ RUN apk add git build-base musl-dev linux-headers
WORKDIR /app
ADD . .

RUN pip install --editable ".[dev,test,docs,ui]"
RUN pip install --editable ".[ui]"

ENTRYPOINT ["aross-stations-db"]
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ Now, you can use Adminer's SQL Query menu to select some data:
<details>
<summary>Example SQL query</summary>

This query returns 13 results at the time of this writing, but it may return more at a
future time.

```sql
select event.*
from event
Expand All @@ -183,6 +186,8 @@ where
)
AND event.time_start > '2023-01-01'::date
AND event.time_end < '2023-06-01'::date
AND event.snow_on_ground
AND event.rain_hours >= 1
;
```
</details>
Expand Down Expand Up @@ -236,7 +241,9 @@ docker compose down

##### Database

Remove the `_db/` directory to start over with a fresh database.
There is no need to remove the `_data/` directory to start over with a fresh database; the
`init` CLI command will do that for you! However, if you want to completely remove the
database to save space on your system, you may want to delete the `_data/` directory.


##### Containers and images
Expand Down
6 changes: 3 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@

@nox.session
def typecheck(session: nox.Session) -> None:
session.install(".[test]")
session.install("--editable", ".[test]")
session.run("mypy")


@nox.session
def test(session: nox.Session) -> None:
"""Run the unit and regular tests."""
session.install(".[test]")
session.install("--editable", ".[test]")
session.run("pytest", *session.posargs)


Expand All @@ -45,7 +45,7 @@ def build_docs(session: nox.Session) -> None:

extra_installs = ["sphinx-autobuild"] if args.serve else []

session.install("-e.[docs]", *extra_installs)
session.install("--editable", ".[docs]", *extra_installs)
session.chdir("docs")

if args.builder == "linkcheck":
Expand Down
2 changes: 1 addition & 1 deletion src/aross_stations_db/api/v1/climatology.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
ClimatologyJsonElement,
climatology_query_results_to_json,
)
from aross_stations_db.query import climatology_query
from aross_stations_db.db.query import climatology_query

router = APIRouter()

Expand Down
2 changes: 1 addition & 1 deletion src/aross_stations_db/api/v1/stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
StationsGeoJson,
stations_query_results_to_geojson,
)
from aross_stations_db.query import stations_query
from aross_stations_db.db.query import stations_query

router = APIRouter()

Expand Down
2 changes: 1 addition & 1 deletion src/aross_stations_db/api/v1/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
TimeseriesJsonElement,
timeseries_query_results_to_json,
)
from aross_stations_db.query import timeseries_query
from aross_stations_db.db.query import timeseries_query

router = APIRouter()

Expand Down
10 changes: 5 additions & 5 deletions src/aross_stations_db/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from sqlalchemy.orm import Session

from aross_stations_db.config import CliLoadSettings, Settings
from aross_stations_db.db import (
create_tables,
from aross_stations_db.db.setup import (
load_events,
load_stations,
recreate_tables,
)
from aross_stations_db.source_data import (
get_events,
Expand All @@ -21,15 +21,15 @@ def cli() -> None:

@cli.command
def init() -> None:
"""Create the database tables."""
"""Create the database tables, dropping any that pre-exist."""
# TODO: False-positive. Remove type-ignore.
# See: https://github.com/pydantic/pydantic/issues/6713
config = Settings() # type:ignore[call-arg]

with Session(config.db_engine) as db_session:
create_tables(db_session)
recreate_tables(db_session)

logger.success("Tables created")
logger.success("Database initialized")


@cli.command
Expand Down
50 changes: 0 additions & 50 deletions src/aross_stations_db/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,50 +0,0 @@
import datetime as dt
from collections.abc import Iterator

from sqlalchemy.orm import Session

from aross_stations_db.db.tables import Base, Event, Station


def create_tables(session: Session) -> None:
"""Create all tables.

IMPORTANT: Because this data is purely derived and can be loaded in a reasonable
amount of time, there is no need to ever drop tables or migrate data. We
just start with a fresh database every time we need to change the
structure.
"""
Base.metadata.create_all(session.get_bind())


def load_stations(stations: list[dict[str, str]], *, session: Session) -> None:
session.add_all(
[
Station(
id=station["stid"],
name=station["station_name"],
country_code=station["country"],
location=_station_location_wkt(station),
)
for station in stations
]
)
session.commit()


def load_events(events: Iterator[dict[str, str]], *, session: Session) -> None:
session.add_all(
[
Event(
station_id=event["station_id"],
time_start=dt.datetime.fromisoformat(event["start"]),
time_end=dt.datetime.fromisoformat(event["end"]),
)
for event in events
]
)
session.commit()


def _station_location_wkt(station: dict[str, str]) -> str:
return f"SRID=4326;POINT({station['longitude']} {station['latitude']})"
File renamed without changes.
90 changes: 90 additions & 0 deletions src/aross_stations_db/db/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import datetime as dt
from collections.abc import Iterator

from sqlalchemy import MetaData
from sqlalchemy.orm import Session

from aross_stations_db.db.tables import Base, Event, Station


def _tables_to_drop(session: Session) -> MetaData:
"""Select our application's tables for dropping.

This isn't as simple as it sounds, because we want to drop tables, even if we've
changed the name of the table, without dealing with migrations. We just want to
start over and we don't want the user to have to know to delete the database files.

WARNING: This function is fragile! If extensions other than PostGIS are installed,
or PostGIS changes its use of system tables, or this is deployed using a different
database than the official PostGIS docker image, it may not work!
"""
# NOTE: "public" is the default schema. Because we're using the out-of-the-box
# postgres container image config, this works.
reflected_md = MetaData(schema="public")
reflected_md.reflect(
bind=session.get_bind(),
only=lambda tablename, _: tablename != "spatial_ref_sys",
)
return reflected_md


def recreate_tables(session: Session) -> None:
"""Create all application tables, dropping any pre-existing tables."""
tables_to_drop = _tables_to_drop(session)
tables_to_drop.drop_all(bind=session.get_bind())

Base.metadata.create_all(session.get_bind())
session.commit()


def load_stations(stations: list[dict[str, str]], *, session: Session) -> None:
session.add_all(
[
Station(
id=station["stid"],
name=station["station_name"],
country_code=station["country"],
# HACK: Passing a string for location is "wrong" here, but it's working.
# Something is being handled implicitly to convert the string to binary
# (WKB).
location=_station_location_wkt(station), # type: ignore[arg-type]
)
for station in stations
]
)
session.commit()


def load_events(events: Iterator[dict[str, str]], *, session: Session) -> None:
session.add_all(
[
Event(
station_id=event["station_id"],
time_start=dt.datetime.fromisoformat(event["start"]),
time_end=dt.datetime.fromisoformat(event["end"]),
snow_on_ground=_snow_on_ground_status(event["sog"]),
rain_hours=int(event["RA"]),
freezing_rain_hours=int(event["FZRA"]),
solid_precipitation_hours=int(event["SOLID"]),
unknown_precipitation_hours=int(event["UP"]),
)
for event in events
]
)
session.commit()


def _station_location_wkt(station: dict[str, str]) -> str:
return f"SRID=4326;POINT({station['longitude']} {station['latitude']})"


def _snow_on_ground_status(sog_str: str) -> bool | None:
if sog_str == "":
return None
if sog_str.lower() == "true":
return True
if sog_str.lower() == "false":
return False

msg = f"Unexpected snow-on-ground value: {sog_str}"
raise RuntimeError(msg)
20 changes: 17 additions & 3 deletions src/aross_stations_db/db/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
from sqlalchemy.orm import (
DeclarativeBase,
Mapped,
MappedAsDataclass,
mapped_column,
relationship,
)


class Base(DeclarativeBase):
class Base(MappedAsDataclass, DeclarativeBase):
pass


Expand Down Expand Up @@ -40,5 +41,18 @@ class Event(Base):
time_start: Mapped[dt.datetime] = mapped_column(primary_key=True)
time_end: Mapped[dt.datetime] = mapped_column(primary_key=True)

# TODO: More fields: duration,RA,UP,FZRA,SOLID,t2m_mean,t2m_min,t2m_max,sog
# Don't think we need to keep duration.
# Was there snow on the ground during this event? Only available after 2004 for some
# stations, never available for other stations.
snow_on_ground: Mapped[bool | None] = mapped_column(index=True)

# During how many hours of this event was rain (or other event type) detected? These
# precipitation types are detected by a horizontal beam that the precipitation falls
# through.
rain_hours: Mapped[int] = mapped_column(index=True)
freezing_rain_hours: Mapped[int] = mapped_column(index=True)
# Solid precipitation = snow, ice, graupel, hail, etc.
solid_precipitation_hours: Mapped[int] = mapped_column(index=True)
unknown_precipitation_hours: Mapped[int] = mapped_column(index=True)

# TODO: More fields: duration,t2m_mean,t2m_min,t2m_max
# I don't think we need to keep duration.
Loading