diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c10c784 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +_data/ diff --git a/.github/workflows/cd.yml b/.github/workflows/publish.yml similarity index 75% rename from .github/workflows/cd.yml rename to .github/workflows/publish.yml index 3eb5bf5..5fca65d 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/publish.yml @@ -20,20 +20,19 @@ env: FORCE_COLOR: 3 jobs: - dist: - name: Distribution build + build-python-package: + name: Build Python package runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: hynek/build-and-inspect-python-package@v2 - publish: - needs: [dist] - name: Publish to PyPI + publish-python-package: + name: Publish Python package to PyPI + needs: ["build-python-package"] environment: pypi permissions: id-token: write @@ -52,3 +51,9 @@ jobs: # Remember to tell (test-)pypi about this repo before publishing # Remove this line to publish to PyPI repository-url: https://test.pypi.org/legacy/ + + build-and-publish-docker-image: + name: "Build and publish Docker image" + needs: ["build-python-package"] + uses: "nsidc/.github/.github/workflows/build-and-publish-container-image.yml@main" + secrets: "inherit" diff --git a/.gitignore b/.gitignore index e4a68a7..49bc55c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Specific to this project: ## Local db data volume mount for development: _data/ +compose.override.yml # Byte-compiled / optimized / DLL files diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ced75a3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.12-alpine + +RUN apk add git + +WORKDIR /app +ADD . . + +RUN pip install --editable ".[dev,test,docs]" + +ENTRYPOINT ["aross-stations-db"] diff --git a/README.md b/README.md index 2f3ee59..c3cee06 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ [![GitHub Discussion][github-discussions-badge]][github-discussions-link] -Reads ASOS station data from disk on the NSIDC archive to create a temporally and -geospatially indexed database to quickly search events. +Reads Automated Surface Observation Station (ASOS) data from disk on the NSIDC archive +to create a temporally and geospatially indexed database to quickly search events. > [!NOTE] > TODO: Is this data available publicly and documented? How is it produced? Links! @@ -18,64 +18,29 @@ geospatially indexed database to quickly search events. ## Install -For now, you can install from this repository. +To get started quickly, [install Docker](https://docs.docker.com/engine/install/). -> [!NOTE] -> TODO: Publish to PyPI if we decide this package should continue to exist and not be -> moved or renamed :) - -> [!NOTE] -> TODO: Dockerize the package - -```bash -pip install git+https://github.com/nsidc/aross-stations-db.git -``` - -> [!WARNING] -> When installed this way, `.env` - - -### Dev install - -> [!NOTE] -> Don't worry about this unless you intend to change the code! - -> [!NOTE] -> TODO: Extract dev stuff to separate doc? OR make a collapsible dev quickstart in the -> README? - -With this method, you can change the source code and the changes will be reflected -without needing to re-install. -```bash -pip install --editable ".[dev]" -``` +## Usage +Everything presumes the current working directory is the root of this repo unless +otherwise stated. -#### Checks +
Dev quickstart -If installed correctly, you can now lint & format the code, which is run through -`pre-commit` because these tasks don't require installing the package: +> :bangbang: Don't worry about this unless you intend to change the code! -```bash -pre-commit run --all-files -``` +**View +[our contributing docs](https://aross-stations-db.readthedocs.io/en/latest/contributing.html) +for more details!** -You can also type-check and test the code, which are run through `nox` because this -_does_ require installing the package: +Use the pre-configured dev compose configuration: ```bash -nox +ln -s compose.dev.yml compose.override.dev.yml ``` -> [!TIP] -> To reuse an already-created Nox env, add `-R`. - - -## Usage - -Everything presumes the current working directory is the root of this repo unless -otherwise stated. +
### Set envvars @@ -83,18 +48,31 @@ otherwise stated. Create a `.env` file or otherwise `export` the envvars. Your `.env` file might look like this if you're running a local database: +> [!IMPORTANT] +> `$AROSS_DATA_BASEDIR` should be Andy's data directory containing expected "metadata" +> and "events" subdirectories. TODO: Document how that data is created! _How can the +> public access it?_ + +> [!NOTE] +> The connection string shown here is for connecting within the Docker network to a +> container with the hostname `db`. + ```bash POSTGRES_PASSWORD="supersecret" -AROSS_DB_CONNSTR="postgresql+psycopg://aross:${POSTGRES_PASSWORD}@localhost:5432/aross" -# NOTE: This dir should contain "metadata" and "events" subdirectories: +AROSS_DB_CONNSTR="postgresql+psycopg://aross:${POSTGRES_PASSWORD}@db:5432/aross" AROSS_DATA_BASEDIR="/path/to/aross-data-dir" ``` -### Start a fresh database +### Start the application stack -This repo provides a quickstart database in Docker, defined in `compose.yml`. Once -started, you (and our code!) can connect on port `5432`. +The stack is configured within `compose.yml` and includes containers: + +* `aross-stations-db`: A [PostGIS](https://postgis.net/) database for quickly storing + and accessing event records. +* `aross-stations-admin`: An [Adminer](https://www.adminer.org/) container for + inspecting the database in the browser. +* `aross-stations-api`: An HTTP API for accessing data in the database. ```bash docker compose up --detach @@ -103,8 +81,8 @@ docker compose up --detach ### Inspect the database -You can use the included [Adminer](https://www.adminer.org/) container for quick -inspection. Navigate in your browser to `http://localhost:80` and enter: +You can use the included Adminer container for quick inspection. Navigate in your +browser to `http://localhost:80` and enter: * System: PostgreSQL * Server: `aross-stations-db` @@ -120,12 +98,15 @@ inspection. Navigate in your browser to `http://localhost:80` and enter: ### Run ingest ```bash -aross-stations-db init # Create empty tables (deleting any pre-existing ones) -aross-stations-db load # Load the tables from event files +docker compose run ingest init # Create empty tables (deleting any pre-existing ones) +docker compose run ingest load # Load the tables from event files ``` From a fast disk, this should take under 2 minutes. + +### :sparkles: Check out the data! + Now, you can use Adminer's SQL Query menu to select some data:
@@ -149,21 +130,8 @@ where ```
- -### Run API - -> [!NOTE] -> TODO: Dockerize this component - -```bash -fastapi run src/aross_stations_db/api -``` - -Or, to get hot reloading for development: - -```bash -fastapi dev src/aross_stations_db/api -``` +Or you can check out the API docs in your browser at `http://localhost:8000/docs` or +submit an HTTP query:
Example HTTP query @@ -181,6 +149,23 @@ docker compose down ``` +### Start over + +Remove the `_db/` directory to start over with a fresh database. + + +### View logs + +In this example, we view and follow logs for the `api` service: + +```bash +docker compose logs --follow api +``` + +You can replace `api` with any other service name, or omit it to view logs for all +services. + + ## Troubleshooting ### `Permission denied` errors from FastAPI diff --git a/compose.dev.yml b/compose.dev.yml new file mode 100644 index 0000000..6dd7419 --- /dev/null +++ b/compose.dev.yml @@ -0,0 +1,12 @@ +services: + api: + image: "nsidc/aross-stations-db:dev" + build: "." + volumes: + - "${PWD}:/app" + + cli: + image: "nsidc/aross-stations-db:dev" + build: "." # TODO: Dev compose + volumes: + - "${PWD}:/app" diff --git a/compose.yml b/compose.yml index d43b015..50afe2b 100644 --- a/compose.yml +++ b/compose.yml @@ -1,29 +1,57 @@ +x-common: &common + logging: + options: + max-size: "20m" + max-file: "5" + services: - aross-stations-db: - image: "postgis/postgis:16-3.4-alpine" - container_name: "aross-stations-db" + api: + container_name: "aross-stations-api" + depends_on: ["db"] + image: "nsidc/aross-stations-db" # TODO: Real tag! + <<: *common + entrypoint: "fastapi" + command: ["dev", "--host", "0.0.0.0", "./src/aross_stations_db/api"] ports: - # TODO: Don't expose in prod - - "5432:5432" + - "8000:8000" + + + db: + container_name: "aross-stations-db" + image: "postgis/postgis:16-3.4-alpine" + <<: *common + environment: POSTGRES_DB: "aross" POSTGRES_USER: "aross" POSTGRES_PASSWORD: null volumes: - "./_data:/var/lib/postgresql/data" - logging: - options: - max-size: "20m" - max-file: "5" - aross-stations-db-admin-ui: - image: "adminer" + + admin: container_name: "aross-stations-admin" + depends_on: ["db"] + image: "adminer" + <<: *common + ports: - - "80:8080" + - "8080:8080" restart: "unless-stopped" - logging: - options: - max-size: "20m" - max-file: "5" + + + cli: + container_name: "aross-stations-cli" + depends_on: ["db"] + image: "nsidc/aross-stations-db" # TODO: Real tag! + <<: *common + + volumes: + - "${AROSS_DATA_BASEDIR}:/data" + environment: + AROSS_DATA_BASEDIR: "/data" + profiles: + # Prevents this "service" from running with `docker compose up`. + # Instead, it's intended to be used like `docker compose run cli --help`. + - "_" diff --git a/src/aross_stations_db/cli.py b/src/aross_stations_db/cli.py index 4996b7c..00aa499 100644 --- a/src/aross_stations_db/cli.py +++ b/src/aross_stations_db/cli.py @@ -2,7 +2,7 @@ from loguru import logger from sqlalchemy.orm import Session -from aross_stations_db.config import Settings +from aross_stations_db.config import CliLoadSettings, Settings from aross_stations_db.db import ( create_tables, load_events, @@ -37,7 +37,7 @@ def load() -> None: """Load the database tables from files on disk.""" # TODO: False-positive. Remove type-ignore. # See: https://github.com/pydantic/pydantic/issues/6713 - config = Settings() # type:ignore[call-arg] + config = CliLoadSettings() # type:ignore[call-arg] stations = get_stations(config.stations_metadata_filepath) events = get_events(config.events_dir) diff --git a/src/aross_stations_db/config.py b/src/aross_stations_db/config.py index ee75d02..3a75edb 100644 --- a/src/aross_stations_db/config.py +++ b/src/aross_stations_db/config.py @@ -10,11 +10,26 @@ class Settings(BaseSettings): + """The universal settings that every part of the app needs.""" + model_config = SettingsConfigDict(env_prefix="AROSS_") - DATA_BASEDIR: DirectoryPath DB_CONNSTR: PostgresDsn + @computed_field # type:ignore[misc] + @cached_property + def db_engine(self) -> Engine: + return create_engine(str(self.DB_CONNSTR)) + + +class CliLoadSettings(Settings): + """The settings only needed when loading data with the CLI. + + We need one extra setting for this because loading requires access to input data. + """ + + DATA_BASEDIR: DirectoryPath + # TODO: Specifically ignore this type of error instead of using type-ignore; but # mypy doesn't yet categorize this error in its own type, so we need to wait for a # release, likely 1.11: https://github.com/python/mypy/pull/16571/files @@ -27,9 +42,3 @@ def events_dir(self) -> DirectoryPath: @cached_property def stations_metadata_filepath(self) -> FilePath: return self.DATA_BASEDIR / "metadata" / "aross.asos_stations.metadata.csv" - - # TODO: Remove? - @computed_field # type:ignore[misc] - @cached_property - def db_engine(self) -> Engine: - return create_engine(str(self.DB_CONNSTR))