Skip to content

Commit

Permalink
resolve conflicts and run black
Browse files Browse the repository at this point in the history
  • Loading branch information
linglp committed Jun 19, 2024
2 parents 29a00a2 + ddfb9d5 commit 3871961
Show file tree
Hide file tree
Showing 25 changed files with 815 additions and 493 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
run: |
# ran only on certain files for now
# add here when checked
poetry run black schematic --check
poetry run black schematic tests schematic_api --check
#----------------------------------------------
# type checking/enforcement
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ repos:
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.10
files: schematic/
files: ^(tests|schematic|schematic_api)/
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ model:
# This section is for using google sheets with Schematic
google_sheets:
# The Synapse id of the Google service account credentials.
service_acct_creds_synapse_id: "syn25171627"
# Path to the synapse config file, either absolute or relative to this file
service_acct_creds: "schematic_service_account_creds.json"
# When doing google sheet validation (regex match) with the validation rules.
Expand Down
2 changes: 0 additions & 2 deletions config_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ model:

# This section is for using google sheets with Schematic
google_sheets:
# The Synapse id of the Google service account credentials.
service_acct_creds_synapse_id: "syn25171627"
# Path to the synapse config file, either absolute or relative to this file
service_acct_creds: "schematic_service_account_creds.json"
# When doing google sheet validation (regex match) with the validation rules.
Expand Down
8 changes: 0 additions & 8 deletions schematic/configuration/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,6 @@ def model_location(self) -> str:
"""
return self._model_config.location

@property
def service_account_credentials_synapse_id(self) -> str:
"""
Returns:
str: The Synapse id of the Google service account credentials.
"""
return self._google_sheets_config.service_acct_creds_synapse_id

@property
def service_account_credentials_path(self) -> str:
"""
Expand Down
3 changes: 0 additions & 3 deletions schematic/configuration/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,10 @@ class GoogleSheetsConfig:
strict_validation: When doing google sheet validation (regex match) with the validation rules.
True is alerting the user and not allowing entry of bad values.
False is warning but allowing the entry on to the sheet.
service_acct_creds_synapse_id: The Synapse id of the Google service account credentials.
service_acct_creds: Path to the Google service account credentials,
either absolute or relative to this file
"""

service_acct_creds_synapse_id: str = "syn25171627"
service_acct_creds: str = "schematic_service_account_creds.json"
strict_validation: bool = True

Expand All @@ -151,7 +149,6 @@ def validate_string_is_not_empty(cls, value: str) -> str:
raise ValueError(f"{value} is an empty string")
return value

@validator("service_acct_creds_synapse_id")
@classmethod
def validate_synapse_id(cls, value: str) -> str:
"""Check if string is a valid synapse id
Expand Down
5 changes: 4 additions & 1 deletion schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,10 @@ def upload_manifest_file(
).id

synapseutils.copy_functions.changeFileMetaData(
syn=self.syn, entity=manifest_synapse_file_id, downloadAs=file_name_new
syn=self.syn,
entity=manifest_synapse_file_id,
downloadAs=file_name_new,
forceVersion=False,
)

return manifest_synapse_file_id
Expand Down
34 changes: 0 additions & 34 deletions schematic/utils/google_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from googleapiclient.discovery import build, Resource # type: ignore
from google.oauth2 import service_account # type: ignore
from schematic.configuration.configuration import CONFIG
from schematic.store.synapse import SynapseStorage

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,39 +66,6 @@ def build_service_account_creds() -> GoogleServiceAcountCreds:
return creds


def download_creds_file() -> None:
"""Download google credentials file"""
syn = SynapseStorage.login()

# if file path of service_account does not exist
# and if an environment variable related to service account is not found
# regenerate service_account credentials
if (
not os.path.exists(CONFIG.service_account_credentials_path)
and "SERVICE_ACCOUNT_CREDS" not in os.environ
):
# synapse ID of the 'schematic_service_account_creds.json' file
api_creds = CONFIG.service_account_credentials_synapse_id

# Download in parent directory of SERVICE_ACCT_CREDS to
# ensure same file system for os.rename()
creds_dir = os.path.dirname(CONFIG.service_account_credentials_path)

creds_file = syn.get(api_creds, downloadLocation=creds_dir)
os.rename(creds_file.path, CONFIG.service_account_credentials_path)

logger.info(
"The credentials file has been downloaded "
f"to '{CONFIG.service_account_credentials_path}'"
)

elif "SERVICE_ACCOUNT_CREDS" in os.environ:
# remind users that "SERVICE_ACCOUNT_CREDS" as an environment variable is being used
logger.info(
"Using environment variable SERVICE_ACCOUNT_CREDS as the credential file."
)


@no_type_check
def execute_google_api_requests(service, requests_body, **kwargs) -> Any:
"""
Expand Down
246 changes: 126 additions & 120 deletions schematic_api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,120 +1,126 @@
FROM tiangolo/uwsgi-nginx-flask:python3.10

# add version tag as a build argument
ARG TAG

# the environment variables defined here are the default
# and can be overwritten by docker run -e VARIABLE = XX
# or can be overwritten by .env when using docker compose
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=200 \
POETRY_VERSION=1.3.0 \
APP_PARENT_DIR=/app \
NGINX_CONFIG=/etc/nginx/conf.d \
APP_DIR=/app/app \
ROOT=/ \
UWSGI_INI=/app/uwsgi.ini \
NGINX_WORKER_PROCESSES=1 \
VERSION=$TAG

# Note:
# The starting number of uWSGI processes is controlled by the variable UWSGI_CHEAPER, by default set to 2.
# The maximum number of uWSGI processes is controlled by the variable UWSGI_PROCESSES, by default set to 16
# By default, the image starts with 2 uWSGI processes running. When the server is experiencing a high load, it creates up to 16 uWSGI processes to handle it on demand.
# NGINX_MAX_UPLOAD is set to 0 by default that allows unlimited upload file sizes
# NGINX_WORKER_CONNECTIONS is set to 1024 by default that allows a maximum limit of 1024 connections per worker.
# NGINX_WORKER_OPEN_FILES is set to 2048 by default that allows 2048 open files

# run open ssl and generate certificate
RUN apt update && \
apt-get install openssl && \
openssl req -x509 -nodes -days 365 \
-subj "/C=CA/ST=QC/O=Company" \
-newkey rsa:2048 -keyout /etc/ssl/private/localhost.key \
-out /etc/ssl/certs/localhost.crt;

# add dhparam.pem
# this step takes quite some time
RUN openssl dhparam -out /etc/ssl/dhparam.pem 4096

# copy config files that handle encryption to docker
WORKDIR ${NGINX_CONFIG}
COPY ./self-signed.conf ./ssl-params.conf ./certificate.conf ./


# use custom uwsgi-nginx-entrypoint.sh
# this uwsgi-nginx-entrypoint.sh file is derived from: https://github.com/tiangolo/uwsgi-nginx-flask-docker/blob/master/docker-images/entrypoint.sh
# we have to modify it so that we could generate a different /etc/nginx/conf.d/nginx.conf file
WORKDIR ${ROOT}
COPY ./uwsgi-nginx-entrypoint.sh ./entrypoint2.sh
COPY ./uwsgi-nginx-entrypoint.sh ./uwsgi-nginx-entrypoint2.sh
RUN chmod +x uwsgi-nginx-entrypoint2.sh
RUN chmod +x entrypoint2.sh
RUN chown -R nginx /uwsgi-nginx-entrypoint2.sh
RUN chown -R nginx /entrypoint2.sh

# install poetry
RUN pip install --no-cache-dir "poetry==$POETRY_VERSION"

# set work directory
WORKDIR ${APP_PARENT_DIR}
RUN chown www-data:www-data ${APP_PARENT_DIR}

# remove the old uwsgi.ini and main.py from the original image
RUN rm -rf ${APP_PARENT_DIR}/main.py
RUN rm -rf ${APP_PARENT_DIR}/uwsgi.ini

# copy to use custom uwsgi.ini
COPY ./uwsgi.ini ./

# create a separate folder called app
RUN mkdir app
WORKDIR ${APP_DIR}

# copy other files to app/app
# Note: run_api.py is not needed

COPY ./pyproject.toml ./poetry.lock ./main.py ./
COPY ./config_example.yml ./config.yml
RUN poetry config virtualenvs.create false
RUN poetry install --no-interaction --all-extras --no-root

# copy schematic_api folder
COPY schematic_api ./schematic_api

# copy great_expectations folder
COPY great_expectations ./great_expectations

# copy tests folder because some endpoints by default download to the tests folder
COPY tests ./tests

# change permission
RUN chown -R www-data:www-data ${APP_DIR}

# allow downloading to synapse cache
RUN chown -R www-data:www-data /root

# copy schematic
COPY schematic ./schematic

# change permission
WORKDIR /var/www/
#The -R option: make the command recursive, so it will change the owner of all files and subdirectories within a given folder.
RUN chown -R www-data:www-data /var/www/

RUN chown -R www-data:www-data /var/tmp/

# change work directory back
WORKDIR ${APP_DIR}

# specify entrypoint again to generate config
# have to respecify CMD too
ENTRYPOINT ["/entrypoint2.sh"]
CMD ["/start.sh"]

# Expose ports
EXPOSE 443
FROM tiangolo/uwsgi-nginx-flask:python3.10

# add version tag as a build argument
ARG TAG

# the environment variables defined here are the default
# and can be overwritten by docker run -e VARIABLE = XX
# or can be overwritten by .env when using docker compose
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=200 \
POETRY_VERSION=1.3.0 \
APP_PARENT_DIR=/app \
NGINX_CONFIG=/etc/nginx/conf.d \
APP_DIR=/app/app \
ROOT=/ \
UWSGI_INI=/app/uwsgi.ini \
NGINX_WORKER_PROCESSES=1 \
VERSION=$TAG



LABEL maintainer="Lingling Peng <[email protected]> Andrew Lamb <[email protected]> Gianna Jordan <[email protected]>"
LABEL version=$TAG


# Note:
# The starting number of uWSGI processes is controlled by the variable UWSGI_CHEAPER, by default set to 2.
# The maximum number of uWSGI processes is controlled by the variable UWSGI_PROCESSES, by default set to 16
# By default, the image starts with 2 uWSGI processes running. When the server is experiencing a high load, it creates up to 16 uWSGI processes to handle it on demand.
# NGINX_MAX_UPLOAD is set to 0 by default that allows unlimited upload file sizes
# NGINX_WORKER_CONNECTIONS is set to 1024 by default that allows a maximum limit of 1024 connections per worker.
# NGINX_WORKER_OPEN_FILES is set to 2048 by default that allows 2048 open files

# run open ssl and generate certificate
RUN apt update && \
apt-get install openssl && \
openssl req -x509 -nodes -days 365 \
-subj "/C=CA/ST=QC/O=Company" \
-newkey rsa:2048 -keyout /etc/ssl/private/localhost.key \
-out /etc/ssl/certs/localhost.crt;

# add dhparam.pem
# this step takes quite some time
RUN openssl dhparam -out /etc/ssl/dhparam.pem 4096

# copy config files that handle encryption to docker
WORKDIR ${NGINX_CONFIG}
COPY ./self-signed.conf ./ssl-params.conf ./certificate.conf ./


# use custom uwsgi-nginx-entrypoint.sh
# this uwsgi-nginx-entrypoint.sh file is derived from: https://github.com/tiangolo/uwsgi-nginx-flask-docker/blob/master/docker-images/entrypoint.sh
# we have to modify it so that we could generate a different /etc/nginx/conf.d/nginx.conf file
WORKDIR ${ROOT}
COPY ./uwsgi-nginx-entrypoint.sh ./entrypoint2.sh
COPY ./uwsgi-nginx-entrypoint.sh ./uwsgi-nginx-entrypoint2.sh
RUN chmod +x uwsgi-nginx-entrypoint2.sh
RUN chmod +x entrypoint2.sh
RUN chown -R nginx /uwsgi-nginx-entrypoint2.sh
RUN chown -R nginx /entrypoint2.sh

# install poetry
RUN pip install --no-cache-dir "poetry==$POETRY_VERSION"

# set work directory
WORKDIR ${APP_PARENT_DIR}
RUN chown www-data:www-data ${APP_PARENT_DIR}

# remove the old uwsgi.ini and main.py from the original image
RUN rm -rf ${APP_PARENT_DIR}/main.py
RUN rm -rf ${APP_PARENT_DIR}/uwsgi.ini

# copy to use custom uwsgi.ini
COPY ./uwsgi.ini ./

# create a separate folder called app
RUN mkdir app
WORKDIR ${APP_DIR}

# copy other files to app/app
# Note: run_api.py is not needed

COPY ./pyproject.toml ./poetry.lock ./main.py ./
COPY ./config_example.yml ./config.yml
RUN poetry config virtualenvs.create false
RUN poetry install --no-interaction --all-extras --no-root

# copy schematic_api folder
COPY schematic_api ./schematic_api

# copy great_expectations folder
COPY great_expectations ./great_expectations

# copy tests folder because some endpoints by default download to the tests folder
COPY tests ./tests

# change permission
RUN chown -R www-data:www-data ${APP_DIR}

# allow downloading to synapse cache
RUN chown -R www-data:www-data /root

# copy schematic
COPY schematic ./schematic

# change permission
WORKDIR /var/www/
#The -R option: make the command recursive, so it will change the owner of all files and subdirectories within a given folder.
RUN chown -R www-data:www-data /var/www/

RUN chown -R www-data:www-data /var/tmp/

# change work directory back
WORKDIR ${APP_DIR}

# specify entrypoint again to generate config
# have to respecify CMD too
ENTRYPOINT ["/entrypoint2.sh"]
CMD ["/start.sh"]

# Expose ports
EXPOSE 443
Loading

0 comments on commit 3871961

Please sign in to comment.