Skip to content

Commit

Permalink
Merge pull request #151 from cancervariants/issue-146-main
Browse files Browse the repository at this point in the history
refactor!: remove elastic beanstalk work on main branch
  • Loading branch information
korikuzma authored Oct 26, 2022
2 parents dfb280e + 4d63054 commit 4e915eb
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 231 deletions.
18 changes: 0 additions & 18 deletions EBSampleApp-Python.iml

This file was deleted.

1 change: 0 additions & 1 deletion Procfile

This file was deleted.

52 changes: 0 additions & 52 deletions codebuild/deploy_eb_env.py

This file was deleted.

52 changes: 0 additions & 52 deletions codebuild/deploy_eb_env_dev.py

This file was deleted.

30 changes: 0 additions & 30 deletions codebuild/terminate_eb_env.py

This file was deleted.

30 changes: 0 additions & 30 deletions codebuild/terminate_eb_env_dev.py

This file was deleted.

5 changes: 0 additions & 5 deletions cron.yaml

This file was deleted.

15 changes: 3 additions & 12 deletions gene/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
"""The VICC library for normalizing genes."""
from .version import __version__ # noqa: F401
from pathlib import Path
import logging
from os import environ

APP_ROOT = Path(__file__).resolve().parents[0]
from .version import __version__ # noqa: F401

if "GENE_NORM_EB_PROD" in environ:
LOG_FN = "/tmp/gene.log"
else:
LOG_FN = "gene.log"
APP_ROOT = Path(__file__).resolve().parents[0]

logging.basicConfig(
filename=LOG_FN,
filename="gene.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s")
logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
Expand All @@ -25,11 +21,6 @@
logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO) # noqa: E501
logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) # noqa: E501

if "GENE_NORM_EB_PROD" in environ:
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
logger.addHandler(ch)


SEQREPO_DATA_PATH = environ.get("SEQREPO_DATA_PATH",
"/usr/local/share/seqrepo/latest")
Expand Down
27 changes: 17 additions & 10 deletions gene/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from boto3.dynamodb.conditions import Key

from gene import SOURCES
from gene.database import Database, confirm_aws_db_use
from gene.database import Database, confirm_aws_db_use, SKIP_AWS_DB_ENV_NAME, \
VALID_AWS_ENV_NAMES, AWS_ENV_VAR_NAME
from gene.etl import NCBI, HGNC, Ensembl # noqa: F401
from gene.etl.merge import Merge
from gene.schemas import SourceName
Expand All @@ -28,9 +29,9 @@ class CLI:
help="The normalizer(s) you wish to update separated by spaces."
)
@click.option(
'--prod',
'--aws_instance',
is_flag=True,
help="Working in production environment."
help="Using AWS DynamodDB instance."
)
@click.option(
'--db_url',
Expand All @@ -46,15 +47,21 @@ class CLI:
is_flag=True,
help='Update concepts for normalize endpoint from accepted sources.'
)
def update_normalizer_db(normalizer, prod, db_url, update_all,
def update_normalizer_db(normalizer, aws_instance, db_url, update_all,
update_merged):
"""Update selected normalizer source(s) in the gene database."""
# Sometimes GENE_NORM_EB_PROD is accidentally set. We should verify that
if "GENE_NORM_EB_PROD" in environ:
confirm_aws_db_use("PROD")

if prod:
environ['GENE_NORM_PROD'] = "TRUE"
# If SKIP_AWS_CONFIRMATION is accidentally set, we should verify that the
# aws instance should actually be used
invalid_aws_msg = f"{AWS_ENV_VAR_NAME} must be set to one of {VALID_AWS_ENV_NAMES}" # noqa: E501
aws_env_var_set = False
if AWS_ENV_VAR_NAME in environ:
aws_env_var_set = True
assert environ[AWS_ENV_VAR_NAME] in VALID_AWS_ENV_NAMES, invalid_aws_msg
confirm_aws_db_use(environ[AWS_ENV_VAR_NAME].upper())

if aws_env_var_set or aws_instance:
assert AWS_ENV_VAR_NAME in environ, invalid_aws_msg
environ[SKIP_AWS_DB_ENV_NAME] = "true" # this is already checked above
db: Database = Database()
else:
if db_url:
Expand Down
78 changes: 57 additions & 21 deletions gene/database.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,50 @@
"""This module creates the database."""
from gene import PREFIX_LOOKUP
from boto3.dynamodb.conditions import Key
from botocore.exceptions import ClientError
from enum import Enum
import sys
import logging
from os import environ
from typing import List, Optional, Dict, Any, Set

import boto3
import click
import sys
import logging
from boto3.dynamodb.conditions import Key
from botocore.exceptions import ClientError

from gene import PREFIX_LOOKUP


logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# can be set to either `Dev`, `Staging`, or `Prod`
# ONLY set when wanting to access aws instance
AWS_ENV_VAR_NAME = "GENE_NORM_ENV"

# Set to "true" if want to skip db confirmation check. Should ONLY be used for
# deployment needs
SKIP_AWS_DB_ENV_NAME = "SKIP_AWS_CONFIRMATION"


class AwsEnvName(str, Enum):
"""AWS environment name that is being used"""

DEVELOPMENT = "Dev"
STAGING = "Staging"
PRODUCTION = "Prod"


VALID_AWS_ENV_NAMES = {v.value for v in AwsEnvName.__members__.values()}


def confirm_aws_db_use(env_name: str) -> None:
"""Check to ensure that AWS instance should actually be used."""
if click.confirm(f"Are you sure you want to use the AWS {env_name} database?",
default=False):
click.echo(f"***GENE AWS {env_name.upper()} DATABASE IN USE***")
else:
click.echo("Exiting.")
sys.exit()


class Database:
"""The database class."""
Expand All @@ -23,19 +55,23 @@ def __init__(self, db_url: str = '', region_name: str = 'us-east-2'):
:param str db_url: URL endpoint for DynamoDB source
:param str region_name: default AWS region
"""
if 'GENE_NORM_PROD' in environ or 'GENE_NORM_EB_PROD' in environ:
gene_concepts_table = "gene_concepts" # default
gene_metadata_table = "gene_metadata" # default
if AWS_ENV_VAR_NAME in environ:
aws_env = environ[AWS_ENV_VAR_NAME]
assert aws_env in VALID_AWS_ENV_NAMES, f"{AWS_ENV_VAR_NAME} must be one of {VALID_AWS_ENV_NAMES}" # noqa: E501

skip_confirmation = environ.get(SKIP_AWS_DB_ENV_NAME)
if (not skip_confirmation) or (skip_confirmation and skip_confirmation != "true"): # noqa: E501
confirm_aws_db_use(environ[AWS_ENV_VAR_NAME])

boto_params = {
'region_name': region_name
"region_name": region_name
}
if 'GENE_NORM_EB_PROD' not in environ:
# EB Instance should not have to confirm.
# This is used only for updating production via CLI
if click.confirm("Are you sure you want to use the "
"production database?", default=False):
click.echo("***GENE PRODUCTION DATABASE IN USE***")
else:
click.echo("Exiting.")
sys.exit()

if aws_env == AwsEnvName.DEVELOPMENT:
gene_concepts_table = "gene_concepts_nonprod"
gene_metadata_table = "gene_metadata_nonprod"
else:
if db_url:
endpoint_url = db_url
Expand All @@ -52,13 +88,13 @@ def __init__(self, db_url: str = '', region_name: str = 'us-east-2'):
self.dynamodb = boto3.resource('dynamodb', **boto_params)
self.dynamodb_client = boto3.client('dynamodb', **boto_params)

# Create tables if nonexistent if not connecting to production database
if 'GENE_NORM_PROD' not in environ and\
'GENE_NORM_EB_PROD' not in environ and 'TEST' not in environ:
# Only create tables for local instance
envs_do_not_create_tables = {AWS_ENV_VAR_NAME, "TEST"}
if not set(envs_do_not_create_tables) & set(environ):
self.create_db_tables()

self.genes = self.dynamodb.Table('gene_concepts')
self.metadata = self.dynamodb.Table('gene_metadata')
self.genes = self.dynamodb.Table(gene_concepts_table)
self.metadata = self.dynamodb.Table(gene_metadata_table)
self.batch = self.genes.batch_writer()
self.cached_sources = {}

Expand Down

0 comments on commit 4e915eb

Please sign in to comment.