Skip to content

Commit

Permalink
Merge pull request #694 from zigouras/issue-630
Browse files Browse the repository at this point in the history
Issue 630: Dockerize postgres
  • Loading branch information
nlebovits authored Jun 17, 2024
2 parents f87259e + 3751f43 commit 5574819
Show file tree
Hide file tree
Showing 10 changed files with 423 additions and 345 deletions.
12 changes: 9 additions & 3 deletions data/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,24 @@ FROM python:3.11.4
# Set the working directory in the container
WORKDIR /usr/src/app

# Install system dependencies for GDAL and Tippecanoe
# postgresql-client for psql and pg_dump executables for backups
# Install system dependencies for GDAL and Tippecanoe
RUN apt-get update && apt-get install -y \
libgdal-dev \
gcc \
git \
build-essential \
libsqlite3-dev \
zlib1g-dev \
postgresql-client \
lsb-release \
&& rm -rf /var/lib/apt/lists/*

# install postgres client 16 for psql and pg_dump executables for backups.
# should match the version used in the other docker file for the postgres install
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
RUN apt update
RUN apt install -y postgresql-client-16

# Set GDAL environment variables
ENV GDAL_VERSION=3.6.2
ENV GDAL_CONFIG=/usr/bin/gdal-config
Expand Down
26 changes: 26 additions & 0 deletions data/Dockerfile-pg
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#
# NOTE: THIS DOCKERFILE IS GENERATED VIA "make update"! PLEASE DO NOT EDIT IT DIRECTLY.
#

FROM postgres:16-bullseye

LABEL maintainer="PostGIS Project - https://postgis.net" \
org.opencontainers.image.description="PostGIS 3.4.2+dfsg-1.pgdg110+1 spatial database extension with PostgreSQL 16 bullseye" \
org.opencontainers.image.source="https://github.com/postgis/docker-postgis"

ENV POSTGIS_MAJOR 3
ENV POSTGIS_VERSION 3.4.2+dfsg-1.pgdg110+1

RUN apt-get update \
&& apt-cache showpkg postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \
&& apt-get install -y --no-install-recommends \
# ca-certificates: for accessing remote raster files;
# fix: https://github.com/postgis/docker-postgis/issues/307
ca-certificates \
\
postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR=$POSTGIS_VERSION \
postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR-scripts \
&& rm -rf /var/lib/apt/lists/*

RUN mkdir -p /docker-entrypoint-initdb.d

20 changes: 19 additions & 1 deletion data/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: "3.8"
services:
vacant-lots-proj:
build: .
Expand Down Expand Up @@ -34,3 +33,22 @@ services:
- ./src:/usr/src/app
- ~/.config/gcloud/application_default_credentials.json:/app/service-account-key.json
command: sh -c "pipenv run python streetview.py"

postgres:
container_name: cagp-postgres
build:
context: .
dockerfile: Dockerfile-pg
environment:
- POSTGRES_PASSWORD
restart: always
ports:
- "5432:5432"
volumes:
- database_volume:/var/lib/postgresql/data
- ./init_pg.sql:/docker-entrypoint-initdb.d/init_pg.sql
extra_hosts:
- host.docker.internal:host-gateway

volumes:
database_volume:
3 changes: 3 additions & 0 deletions data/init_pg.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE DATABASE vacantlotdb;
\c vacantlotdb;
CREATE EXTENSION postgis;
592 changes: 317 additions & 275 deletions data/src/Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/src/classes/diff_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def send_report_to_slack(self):
client.chat_postMessage(
channel=report_to_slack_channel,
text=self.report,
username="CAGP Diff Report Bot",
username="CAGP Diff Bot",
)

def email_report(self):
Expand Down
8 changes: 6 additions & 2 deletions data/src/classes/featurelayer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import subprocess
import traceback
import sqlalchemy as sa

import geopandas as gpd
import pandas as pd
import requests
from config.psql import conn
from config.psql import conn, local_engine
from esridump.dumper import EsriDumper
from google.cloud import storage
from google.cloud.storage.bucket import Bucket
Expand All @@ -20,7 +21,7 @@ def google_cloud_bucket() -> Bucket:
Returns:
Bucket: the gcp bucket
"""
credentials_path = os.path.expanduser("/app/account-service-key.json")
credentials_path = os.path.expanduser("/app/service-account-key.json")

if not os.path.exists(credentials_path):
raise FileNotFoundError(f"Credentials file not found at {credentials_path}")
Expand Down Expand Up @@ -88,6 +89,9 @@ def __init__(

def check_psql(self):
try:
if not sa.inspect(local_engine).has_table(self.psql_table):
print(f"Table {self.psql_table} does not exist")
return False
psql_table = gpd.read_postgis(
f"SELECT * FROM {self.psql_table}", conn, geom_col="geometry"
)
Expand Down
8 changes: 7 additions & 1 deletion data/src/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
from pathlib import Path

FORCE_RELOAD = False
""" During the data load, whether to query the various GIS API services for the data to load into the postgres tables. If True, will query the API services, backup the database, reload the database and report on data differences. If false will read the data from postgres."""

USE_CRS = "EPSG:2272"
""" the standard geospatial code for Pennsylvania South (ftUS) """

MAPBOX_TOKEN = os.environ.get("CFP_MAPBOX_TOKEN_UPLOADER")
""" The location of the token for your mapbox account in your environment """

log_level: int = logging.WARN
""" overall log level for the project """

max_backup_schema_days: int = 365
""" max days to keep backed up schemas archived in plsql """
""" max days to keep backed up schemas archived in psql """

report_to_slack_channel: str = ""
""" if this is not blank, send the data-diff summary report to this Slack channel.
Expand All @@ -31,6 +36,7 @@
tile_file_backup_directory: str = "backup"
""" The name of the directory in GCP to store timestamped backups of the tiles file """


def is_docker() -> bool:
"""
whether we are running in Docker or not, e.g. in ide or cl environment
Expand Down
2 changes: 1 addition & 1 deletion data/src/data_utils/dev_probability.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def dev_probability(primary_featurelayer):

# Classify development probability using Jenks natural breaks
breaks = jenkspy.jenks_breaks(census_bgs_gdf['permit_count'], n_classes=3)
census_bgs_gdf['dev_rank'] = pd.cut(census_bgs_gdf['permit_count'], bins=breaks, labels=['Low', 'Medium', 'High'])
census_bgs_gdf['dev_rank'] = pd.cut(census_bgs_gdf['permit_count'], bins=breaks, labels=['Low', 'Medium', 'High']).astype(str)

updated_census_bgs = FeatureLayer(
name="Updated Census Block Groups",
Expand Down
Loading

0 comments on commit 5574819

Please sign in to comment.