From ff08239a054897067755adbf622a5dcfd1c8486d Mon Sep 17 00:00:00 2001 From: Zac Deziel Date: Thu, 5 Sep 2024 11:54:42 -0700 Subject: [PATCH] Refactor database environment variables to postgres nomencalture (#46) --- .github/workflows/ci.yml | 12 ++++++------ README.md | 14 +++++++------- docs/acceptance/db.md | 12 ++++++------ postgres/download_parquet.sh | 10 +++++----- postgres/load_nyc_sample.sh | 8 ++++---- postgres/load_to_prod.sh | 8 +++----- space2stats_api/cdk/settings.py | 12 ++++++------ space2stats_api/src/space2stats/main.py | 4 ++-- space2stats_api/src/space2stats/settings.py | 18 +++++++++--------- space2stats_api/src/tests/test_api.py | 12 ++++++------ 10 files changed, 54 insertions(+), 56 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e2775f..c0f22fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,9 @@ jobs: run: | poetry run pytest -s -vv env: - DB_HOST: localhost - DB_PORT: 5432 - DB_NAME: mydatabase - DB_USER: myuser - DB_PASSWORD: mypassword - DB_TABLE_NAME: space2stats \ No newline at end of file + PGHOST: localhost + PGPORT: 5432 + PGDATABASE: mydatabase + PGUSER: myuser + PGPASSWORD: mypassword + PGTABLENAME: space2stats \ No newline at end of file diff --git a/README.md b/README.md index c7e0844..ce146f4 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,12 @@ docker-compose up -d - Create a `db.env` file: ```.env -DB_HOST=localhost -DB_PORT=5439 -DB_NAME=postgis -DB_USER=username -DB_PASSWORD=password -DB_TABLE_NAME=space2stats +PGHOST=localhost +PGPORT=5432 +PGDATABASE=postgis +PGUSER=username +PGPASSWORD=password +PGTABLENAME=space2stats ``` - Load our dataset into the database @@ -26,7 +26,7 @@ python postgres/chunk_parquet.py ./postgres/load_parquet_chunks.sh ``` -> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `DB_TABLE_NAME` to `space2stats_nyc_sample`. +> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `PGTABLENAME` to `space2stats_nyc_sample`. - Access your data using the Space2statS API! See the [example notebook](notebooks/space2stats_api_demo.ipynb). diff --git a/docs/acceptance/db.md b/docs/acceptance/db.md index a4ef460..c980f0c 100644 --- a/docs/acceptance/db.md +++ b/docs/acceptance/db.md @@ -27,12 +27,12 @@ Alternatively, you can connect to a remote database, such as the [Tembo database Set the database environment variables in `db.env`: ```bash -DB_HOST=localhost -DB_PORT=5432 -DB_NAME=postgis -DB_USER=postgres -DB_PASSWORD=password -DB_TABLE_NAME=space2stats +PGHOST=localhost +PGPORT=5432 +PGDATABASE=postgis +PGUSER=postgres +PGPASSWORD=password +PGTABLENAME=space2stats ``` > Note: If using the `docker-compose` approach, the above configuration is suitable. diff --git a/postgres/download_parquet.sh b/postgres/download_parquet.sh index 434f269..02981e4 100644 --- a/postgres/download_parquet.sh +++ b/postgres/download_parquet.sh @@ -9,11 +9,11 @@ PARQUET_FILE="Space2Stats/parquet/GLOBAL/combined_population.parquet" LOCAL_PARQUET_FILE="space2stats.parquet" # PostgreSQL configuration -DB_HOST="${MY_DOCKER_IP:-127.0.0.1}" -DB_PORT=5439 -DB_NAME="postgis" -DB_USER="username" -DB_PASSWORD="password" +PGHOST="${MY_DOCKER_IP:-127.0.0.1}" +PGPORT=5439 +PGNAME="postgis" +PGUSER="username" +PGPASSWORD="password" # Download Parquet file from S3 echo "Downloading Parquet file from S3..." diff --git a/postgres/load_nyc_sample.sh b/postgres/load_nyc_sample.sh index 276956c..78c5613 100755 --- a/postgres/load_nyc_sample.sh +++ b/postgres/load_nyc_sample.sh @@ -6,7 +6,7 @@ if [ -f db.env ]; then fi # Check if required environment variables are set -if [ -z "$DB_HOST" ] || [ -z "$DB_PORT" ] || [ -z "$DB_NAME" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then +if [ -z "$PGHOST" ] || [ -z "$PGPORT" ] || [ -z "$PGDATABASE" ] || [ -z "$PGUSER" ] || [ -z "$PGPASSWORD" ]; then echo "One or more required environment variables are missing." exit 1 fi @@ -18,21 +18,21 @@ PARQUET_FILE="nyc_sample.parquet" TABLE_NAME="space2stats_nyc_sample" # Check if the table exists -TABLE_EXISTS=$(psql -h $DB_HOST -p $DB_PORT -d $DB_NAME -U $DB_USER -tAc "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema='public' AND table_name='$TABLE_NAME');") +TABLE_EXISTS=$(psql -h $PGHOST -p $PGPORT -d $PGDATABASE -U $PGUSER -tAc "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema='public' AND table_name='$TABLE_NAME');") echo "Importing $PARQUET_FILE..." if [ "$TABLE_EXISTS" = "t" ]; then # Table exists, append data ogr2ogr -f "PostgreSQL" \ - PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \ + PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \ "$PARQUET_FILE" \ -nln $TABLE_NAME \ -append else # Table does not exist, create table and import data ogr2ogr -f "PostgreSQL" \ - PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \ + PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \ "$PARQUET_FILE" \ -nln $TABLE_NAME diff --git a/postgres/load_to_prod.sh b/postgres/load_to_prod.sh index 9dedc00..c78063f 100755 --- a/postgres/load_to_prod.sh +++ b/postgres/load_to_prod.sh @@ -7,7 +7,7 @@ if [ -f db.env ]; then fi # Check if required environment variables are set -if [ -z "$DB_HOST" ] || [ -z "$DB_PORT" ] || [ -z "$DB_NAME" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then +if [ -z "$PGHOST" ] || [ -z "$PGPORT" ] || [ -z "$PGDATABASE" ] || [ -z "$PGUSER" ] || [ -z "$PGPASSWORD" ]; then echo "One or more required environment variables are missing." exit 1 fi @@ -15,16 +15,14 @@ fi # Directory containing the Parquet chunks CHUNKS_DIR="parquet_chunks" -# Name of the target table -TABLE_NAME="space2stats" PARQUET_FILE=space2stats.parquet echo "Starting" ogr2ogr -progress -f "PostgreSQL" \ - PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \ + PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \ "$PARQUET_FILE" \ - -nln $TABLE_NAME \ + -nln $PGTABLENAME \ -append \ -lco SPATIAL_INDEX=NONE diff --git a/space2stats_api/cdk/settings.py b/space2stats_api/cdk/settings.py index fd22267..a5f2162 100644 --- a/space2stats_api/cdk/settings.py +++ b/space2stats_api/cdk/settings.py @@ -2,12 +2,12 @@ class AppSettings(BaseSettings): - DB_HOST: str - DB_PORT: str - DB_NAME: str - DB_USER: str - DB_PASSWORD: str - DB_TABLE_NAME: str + PGHOST: str + PGPORT: str + PGDATABASE: str + PGUSER: str + PGPASSWORD: str + PGTABLENAME: str class DeploymentSettings(BaseSettings): CDK_DEFAULT_ACCOUNT: str diff --git a/space2stats_api/src/space2stats/main.py b/space2stats_api/src/space2stats/main.py index 93ad60e..accb45f 100644 --- a/space2stats_api/src/space2stats/main.py +++ b/space2stats_api/src/space2stats/main.py @@ -32,7 +32,7 @@ def _get_summaries(fields: List[str], h3_ids: List[str], conn: Connection): FROM {1} WHERE hex_id = ANY (%s) """ - ).format(pg.sql.SQL(", ").join(cols), pg.sql.Identifier(settings.DB_TABLE_NAME)) + ).format(pg.sql.SQL(", ").join(cols), pg.sql.Identifier(settings.PGTABLENAME)) # Convert h3_ids to a list to ensure compatibility with psycopg h3_ids = list(h3_ids) @@ -105,7 +105,7 @@ def get_available_fields(conn: Connection) -> List[str]: cur.execute( sql_query, [ - settings.DB_TABLE_NAME, + settings.PGTABLENAME, ], ) columns = [row[0] for row in cur.fetchall() if row[0] != "hex_id"] diff --git a/space2stats_api/src/space2stats/settings.py b/space2stats_api/src/space2stats/settings.py index 4159ad1..a791091 100644 --- a/space2stats_api/src/space2stats/settings.py +++ b/space2stats_api/src/space2stats/settings.py @@ -2,12 +2,12 @@ class Settings(BaseSettings): - DB_HOST: str - DB_PORT: int - DB_NAME: str - DB_USER: str - DB_PASSWORD: str - DB_TABLE_NAME: str + PGHOST: str + PGPORT: int + PGDATABASE: str + PGUSER: str + PGPASSWORD: str + PGTABLENAME: str # see https://www.psycopg.org/psycopg3/docs/api/pool.html#the-connectionpool-class for options DB_MIN_CONN_SIZE: int = 1 @@ -24,9 +24,9 @@ class Settings(BaseSettings): @property def DB_CONNECTION_STRING(self) -> str: - host_port = f"host={self.DB_HOST} port={self.DB_PORT}" - db_user = f"dbname={self.DB_NAME} user={self.DB_USER}" - return f"{host_port} {db_user} password={self.DB_PASSWORD}" + host_port = f"host={self.PGHOST} port={self.PGPORT}" + db_user = f"dbname={self.PGDATABASE} user={self.PGUSER}" + return f"{host_port} {db_user} password={self.PGPASSWORD}" model_config = { "env_file": "local_db.env", diff --git a/space2stats_api/src/tests/test_api.py b/space2stats_api/src/tests/test_api.py index 3d87a2a..0f0aa53 100644 --- a/space2stats_api/src/tests/test_api.py +++ b/space2stats_api/src/tests/test_api.py @@ -39,12 +39,12 @@ def database(postgresql_proc): @pytest.fixture(autouse=True) def client(monkeypatch, database): - monkeypatch.setenv("DB_HOST", database.host) - monkeypatch.setenv("DB_PORT", str(database.port)) - monkeypatch.setenv("DB_NAME", database.dbname) - monkeypatch.setenv("DB_USER", database.user) - monkeypatch.setenv("DB_PASSWORD", database.password) - monkeypatch.setenv("DB_TABLE_NAME", "space2stats") + monkeypatch.setenv("PGHOST", database.host) + monkeypatch.setenv("PGPORT", str(database.port)) + monkeypatch.setenv("PGDATABASE", database.dbname) + monkeypatch.setenv("PGUSER", database.user) + monkeypatch.setenv("PGPASSWORD", database.password) + monkeypatch.setenv("PGTABLE_NAME", "space2stats") from space2stats.app import app