Skip to content

Commit

Permalink
Refactor database environment variables to postgres nomencalture (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
zacdezgeo authored Sep 5, 2024
1 parent 4ea7fae commit ff08239
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 56 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ jobs:
run: |
poetry run pytest -s -vv
env:
DB_HOST: localhost
DB_PORT: 5432
DB_NAME: mydatabase
DB_USER: myuser
DB_PASSWORD: mypassword
DB_TABLE_NAME: space2stats
PGHOST: localhost
PGPORT: 5432
PGDATABASE: mydatabase
PGUSER: myuser
PGPASSWORD: mypassword
PGTABLENAME: space2stats
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ docker-compose up -d

- Create a `db.env` file:
```.env
DB_HOST=localhost
DB_PORT=5439
DB_NAME=postgis
DB_USER=username
DB_PASSWORD=password
DB_TABLE_NAME=space2stats
PGHOST=localhost
PGPORT=5432
PGDATABASE=postgis
PGUSER=username
PGPASSWORD=password
PGTABLENAME=space2stats
```

- Load our dataset into the database
Expand All @@ -26,7 +26,7 @@ python postgres/chunk_parquet.py
./postgres/load_parquet_chunks.sh
```

> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `DB_TABLE_NAME` to `space2stats_nyc_sample`.
> You can get started with a subset of data for NYC with `./load_nyc_sample.sh` which requires changing your `db.env` value for `PGTABLENAME` to `space2stats_nyc_sample`.
- Access your data using the Space2statS API! See the [example notebook](notebooks/space2stats_api_demo.ipynb).

Expand Down
12 changes: 6 additions & 6 deletions docs/acceptance/db.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ Alternatively, you can connect to a remote database, such as the [Tembo database
Set the database environment variables in `db.env`:

```bash
DB_HOST=localhost
DB_PORT=5432
DB_NAME=postgis
DB_USER=postgres
DB_PASSWORD=password
DB_TABLE_NAME=space2stats
PGHOST=localhost
PGPORT=5432
PGDATABASE=postgis
PGUSER=postgres
PGPASSWORD=password
PGTABLENAME=space2stats
```

> Note: If using the `docker-compose` approach, the above configuration is suitable.
Expand Down
10 changes: 5 additions & 5 deletions postgres/download_parquet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ PARQUET_FILE="Space2Stats/parquet/GLOBAL/combined_population.parquet"
LOCAL_PARQUET_FILE="space2stats.parquet"

# PostgreSQL configuration
DB_HOST="${MY_DOCKER_IP:-127.0.0.1}"
DB_PORT=5439
DB_NAME="postgis"
DB_USER="username"
DB_PASSWORD="password"
PGHOST="${MY_DOCKER_IP:-127.0.0.1}"
PGPORT=5439
PGNAME="postgis"
PGUSER="username"
PGPASSWORD="password"

# Download Parquet file from S3
echo "Downloading Parquet file from S3..."
Expand Down
8 changes: 4 additions & 4 deletions postgres/load_nyc_sample.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if [ -f db.env ]; then
fi

# Check if required environment variables are set
if [ -z "$DB_HOST" ] || [ -z "$DB_PORT" ] || [ -z "$DB_NAME" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then
if [ -z "$PGHOST" ] || [ -z "$PGPORT" ] || [ -z "$PGDATABASE" ] || [ -z "$PGUSER" ] || [ -z "$PGPASSWORD" ]; then
echo "One or more required environment variables are missing."
exit 1
fi
Expand All @@ -18,21 +18,21 @@ PARQUET_FILE="nyc_sample.parquet"
TABLE_NAME="space2stats_nyc_sample"

# Check if the table exists
TABLE_EXISTS=$(psql -h $DB_HOST -p $DB_PORT -d $DB_NAME -U $DB_USER -tAc "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema='public' AND table_name='$TABLE_NAME');")
TABLE_EXISTS=$(psql -h $PGHOST -p $PGPORT -d $PGDATABASE -U $PGUSER -tAc "SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_schema='public' AND table_name='$TABLE_NAME');")

echo "Importing $PARQUET_FILE..."

if [ "$TABLE_EXISTS" = "t" ]; then
# Table exists, append data
ogr2ogr -f "PostgreSQL" \
PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \
PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \
"$PARQUET_FILE" \
-nln $TABLE_NAME \
-append
else
# Table does not exist, create table and import data
ogr2ogr -f "PostgreSQL" \
PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \
PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \
"$PARQUET_FILE" \
-nln $TABLE_NAME

Expand Down
8 changes: 3 additions & 5 deletions postgres/load_to_prod.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,22 @@ if [ -f db.env ]; then
fi

# Check if required environment variables are set
if [ -z "$DB_HOST" ] || [ -z "$DB_PORT" ] || [ -z "$DB_NAME" ] || [ -z "$DB_USER" ] || [ -z "$DB_PASSWORD" ]; then
if [ -z "$PGHOST" ] || [ -z "$PGPORT" ] || [ -z "$PGDATABASE" ] || [ -z "$PGUSER" ] || [ -z "$PGPASSWORD" ]; then
echo "One or more required environment variables are missing."
exit 1
fi

# Directory containing the Parquet chunks
CHUNKS_DIR="parquet_chunks"

# Name of the target table
TABLE_NAME="space2stats"
PARQUET_FILE=space2stats.parquet

echo "Starting"

ogr2ogr -progress -f "PostgreSQL" \
PG:"host=$DB_HOST port=$DB_PORT dbname=$DB_NAME user=$DB_USER password=$DB_PASSWORD" \
PG:"host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER password=$PGPASSWORD" \
"$PARQUET_FILE" \
-nln $TABLE_NAME \
-nln $PGTABLENAME \
-append \
-lco SPATIAL_INDEX=NONE

12 changes: 6 additions & 6 deletions space2stats_api/cdk/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@


class AppSettings(BaseSettings):
DB_HOST: str
DB_PORT: str
DB_NAME: str
DB_USER: str
DB_PASSWORD: str
DB_TABLE_NAME: str
PGHOST: str
PGPORT: str
PGDATABASE: str
PGUSER: str
PGPASSWORD: str
PGTABLENAME: str

class DeploymentSettings(BaseSettings):
CDK_DEFAULT_ACCOUNT: str
Expand Down
4 changes: 2 additions & 2 deletions space2stats_api/src/space2stats/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _get_summaries(fields: List[str], h3_ids: List[str], conn: Connection):
FROM {1}
WHERE hex_id = ANY (%s)
"""
).format(pg.sql.SQL(", ").join(cols), pg.sql.Identifier(settings.DB_TABLE_NAME))
).format(pg.sql.SQL(", ").join(cols), pg.sql.Identifier(settings.PGTABLENAME))

# Convert h3_ids to a list to ensure compatibility with psycopg
h3_ids = list(h3_ids)
Expand Down Expand Up @@ -105,7 +105,7 @@ def get_available_fields(conn: Connection) -> List[str]:
cur.execute(
sql_query,
[
settings.DB_TABLE_NAME,
settings.PGTABLENAME,
],
)
columns = [row[0] for row in cur.fetchall() if row[0] != "hex_id"]
Expand Down
18 changes: 9 additions & 9 deletions space2stats_api/src/space2stats/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@


class Settings(BaseSettings):
DB_HOST: str
DB_PORT: int
DB_NAME: str
DB_USER: str
DB_PASSWORD: str
DB_TABLE_NAME: str
PGHOST: str
PGPORT: int
PGDATABASE: str
PGUSER: str
PGPASSWORD: str
PGTABLENAME: str

# see https://www.psycopg.org/psycopg3/docs/api/pool.html#the-connectionpool-class for options
DB_MIN_CONN_SIZE: int = 1
Expand All @@ -24,9 +24,9 @@ class Settings(BaseSettings):

@property
def DB_CONNECTION_STRING(self) -> str:
host_port = f"host={self.DB_HOST} port={self.DB_PORT}"
db_user = f"dbname={self.DB_NAME} user={self.DB_USER}"
return f"{host_port} {db_user} password={self.DB_PASSWORD}"
host_port = f"host={self.PGHOST} port={self.PGPORT}"
db_user = f"dbname={self.PGDATABASE} user={self.PGUSER}"
return f"{host_port} {db_user} password={self.PGPASSWORD}"

model_config = {
"env_file": "local_db.env",
Expand Down
12 changes: 6 additions & 6 deletions space2stats_api/src/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def database(postgresql_proc):

@pytest.fixture(autouse=True)
def client(monkeypatch, database):
monkeypatch.setenv("DB_HOST", database.host)
monkeypatch.setenv("DB_PORT", str(database.port))
monkeypatch.setenv("DB_NAME", database.dbname)
monkeypatch.setenv("DB_USER", database.user)
monkeypatch.setenv("DB_PASSWORD", database.password)
monkeypatch.setenv("DB_TABLE_NAME", "space2stats")
monkeypatch.setenv("PGHOST", database.host)
monkeypatch.setenv("PGPORT", str(database.port))
monkeypatch.setenv("PGDATABASE", database.dbname)
monkeypatch.setenv("PGUSER", database.user)
monkeypatch.setenv("PGPASSWORD", database.password)
monkeypatch.setenv("PGTABLE_NAME", "space2stats")

from space2stats.app import app

Expand Down

0 comments on commit ff08239

Please sign in to comment.