Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: correctly configure logging #349

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions src/gene/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@
from gene.database.database import DatabaseException
from gene.schemas import SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


def _configure_logging() -> None:
"""Configure logging."""
logging.basicConfig(
filename=f"{__package__}.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
)
logging.getLogger(__package__).setLevel(logging.DEBUG)


@click.command()
Expand All @@ -32,6 +40,7 @@ def check_db(db_url: str, verbose: bool = False) -> None:
:param db_url: URL to normalizer database
:param verbose: if true, print result to console
""" # noqa: D301
_configure_logging()
db = create_db(db_url, False)
if not db.check_schema_initialized():
if verbose:
Expand All @@ -43,8 +52,10 @@ def check_db(db_url: str, verbose: bool = False) -> None:
click.echo("Health check failed: DB is incompletely populated.")
click.get_current_context().exit(1)

msg = "DB health check successful: tables appear complete."
if verbose:
click.echo("DB health check successful: tables appear complete.")
click.echo(msg)
_logger.info(msg)


@click.command()
Expand All @@ -59,6 +70,7 @@ def update_from_remote(data_url: Optional[str], db_url: str) -> None:
:param data_url: user-specified location to pull DB dump from
:param db_url: URL to normalizer database
""" # noqa: D301
_configure_logging()
if not click.confirm("Are you sure you want to overwrite existing data?"):
click.get_current_context().exit()
if not data_url:
Expand All @@ -74,6 +86,7 @@ def update_from_remote(data_url: Optional[str], db_url: str) -> None:
except DatabaseException as e:
click.echo(f"Encountered exception during update: {str(e)}")
click.get_current_context().exit(1)
_logger.info("Successfully loaded data from remote snapshot.")


@click.command()
Expand All @@ -91,6 +104,7 @@ def dump_database(output_directory: Path, db_url: str) -> None:
:param output_directory: path to existing directory
:param db_url: URL to normalizer database
""" # noqa: D301
_configure_logging()
if not output_directory:
output_directory = Path(".")

Expand All @@ -105,6 +119,7 @@ def dump_database(output_directory: Path, db_url: str) -> None:
except DatabaseException as e:
click.echo(f"Encountered exception during update: {str(e)}")
click.get_current_context().exit(1)
_logger.info("Database dump successful.")


def _update_normalizer(
Expand Down Expand Up @@ -140,14 +155,14 @@ def _delete_source(n: SourceName, db: AbstractDatabase) -> float:
"""
msg = f"Deleting {n.value}..."
click.echo(f"\n{msg}")
logger.info(msg)
_logger.info(msg)
start_delete = timer()
db.delete_source(n)
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted {n.value} in {delete_time:.5f} seconds."
click.echo(f"{msg}\n")
logger.info(msg)
_logger.info(msg)
return delete_time


Expand All @@ -172,7 +187,7 @@ def _load_source(
"""
msg = f"Loading {n.value}..."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
start_load = timer()

# used to get source class name from string
Expand All @@ -190,17 +205,17 @@ def _load_source(
try:
processed_ids += source.perform_etl(use_existing)
except GeneNormalizerEtlError as e:
logger.error(e)
_logger.error(e)
click.echo(f"Encountered error while loading {n}: {e}.")
click.get_current_context().exit()
end_load = timer()
load_time = end_load - start_load
msg = f"Loaded {n.value} in {load_time:.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)
msg = f"Total time for {n.value}: {(delete_time + load_time):.5f} seconds."
click.echo(msg)
logger.info(msg)
_logger.info(msg)


def _delete_normalized_data(database: AbstractDatabase) -> None:
Expand Down Expand Up @@ -286,6 +301,7 @@ def update_normalizer_db(
:param update_merged: if true, update normalized records
:param use_existing: if True, use most recent local data instead of fetching latest version
""" # noqa: D301
_configure_logging()
db = create_db(db_url, aws_instance)

if update_all:
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


class DynamoDbDatabase(AbstractDatabase):
Expand Down Expand Up @@ -156,7 +156,7 @@ def check_schema_initialized(self) -> bool:
existing_tables = self.list_tables()
exists = self.gene_table in existing_tables
if not exists:
logger.info(f"{self.gene_table} table is missing or unavailable.")
_logger.info(f"{self.gene_table} table is missing or unavailable.")
return exists

def check_tables_populated(self) -> bool:
Expand All @@ -173,7 +173,7 @@ def check_tables_populated(self) -> bool:
KeyConditionExpression=Key("item_type").eq("source"),
).get("Items", [])
if len(sources) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

records = self.genes.query(
Expand All @@ -182,7 +182,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(records.get("Items", [])) < 1:
logger.info("Gene records index is empty.")
_logger.info("Gene records index is empty.")
return False

normalized_records = self.genes.query(
Expand All @@ -191,7 +191,7 @@ def check_tables_populated(self) -> bool:
Limit=1,
)
if len(normalized_records.get("Items", [])) < 1:
logger.info("Normalized gene records index is empty.")
_logger.info("Normalized gene records index is empty.")
return False

return True
Expand Down Expand Up @@ -253,7 +253,7 @@ def get_record_by_id(
del record["label_and_type"]
return record
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_records_by_id for "
f"search term {concept_id}: "
f"{e.response['Error']['Message']}"
Expand All @@ -276,7 +276,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> List[str]:
matches = self.genes.query(KeyConditionExpression=filter_exp)
return [m["concept_id"] for m in matches.get("Items", None)]
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error on get_refs_by_type for "
f"search term {search_term}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -380,7 +380,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand Down Expand Up @@ -412,7 +412,7 @@ def add_merged_record(self, record: Dict) -> None:
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
"boto3 client error on add_record for "
f"{concept_id}: {e.response['Error']['Message']}"
)
Expand All @@ -438,7 +438,7 @@ def _add_ref_record(
try:
self.batch.put_item(Item=record)
except ClientError as e:
logger.error(
_logger.error(
f"boto3 client error adding reference {term} for "
f"{concept_id} with match type {ref_type}: "
f"{e.response['Error']['Message']}"
Expand Down Expand Up @@ -470,7 +470,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: Any) -> None: # noqa: AN
f"No such record exists for keys {label_and_type}, {concept_id}"
)
else:
logger.error(
_logger.error(
f"boto3 client error in `database.update_record()`: "
f"{e.response['Error']['Message']}"
)
Expand Down
22 changes: 11 additions & 11 deletions src/gene/database/postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from gene.schemas import RecordType, RefType, SourceMeta, SourceName

logger = logging.getLogger(__name__)
_logger = logging.getLogger(__name__)


SCRIPTS_DIR = Path(__file__).parent / "postgresql"
Expand Down Expand Up @@ -119,7 +119,7 @@ def drop_db(self) -> None:
with self.conn.cursor() as cur:
cur.execute(self._drop_db_query)
self.conn.commit()
logger.info("Dropped all existing gene normalizer tables.")
_logger.info("Dropped all existing gene normalizer tables.")

def check_schema_initialized(self) -> bool:
"""Check if database schema is properly initialized.
Expand All @@ -132,7 +132,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene table existence check failed.")
_logger.info("Gene table existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -141,7 +141,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateObject:
self.conn.rollback()
else:
logger.info("Gene foreign key existence check failed.")
_logger.info("Gene foreign key existence check failed.")
self.conn.rollback()
return False
try:
Expand All @@ -152,7 +152,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene normalized view lookup failed.")
_logger.info("Gene normalized view lookup failed.")
self.conn.rollback()
return False
try:
Expand All @@ -161,7 +161,7 @@ def check_schema_initialized(self) -> bool:
except DuplicateTable:
self.conn.rollback()
else:
logger.info("Gene indexes check failed.")
_logger.info("Gene indexes check failed.")
self.conn.rollback()
return False

Expand All @@ -184,21 +184,21 @@ def check_tables_populated(self) -> bool:
cur.execute(self._check_sources_query)
results = cur.fetchall()
if len(results) < len(SourceName):
logger.info("Gene sources table is missing expected sources.")
_logger.info("Gene sources table is missing expected sources.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_concepts_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Gene records table is empty.")
_logger.info("Gene records table is empty.")
return False

with self.conn.cursor() as cur:
cur.execute(self._check_merged_query)
result = cur.fetchone()
if not result or result[0] < 1:
logger.info("Normalized gene records table is empty.")
_logger.info("Normalized gene records table is empty.")
return False

return True
Expand Down Expand Up @@ -260,7 +260,7 @@ def _drop_indexes(self) -> None:

def _create_tables(self) -> None:
"""Create all tables, indexes, and views."""
logger.debug("Creating new gene normalizer tables.")
_logger.debug("Creating new gene normalizer tables.")
tables_query = (SCRIPTS_DIR / "create_tables.sql").read_bytes()

with self.conn.cursor() as cur:
Expand Down Expand Up @@ -594,7 +594,7 @@ def add_record(self, record: Dict, src_name: SourceName) -> None:
cur.execute(self._ins_symbol_query, [record["symbol"], concept_id])
self.conn.commit()
except UniqueViolation:
logger.error(f"Record with ID {concept_id} already exists")
_logger.error(f"Record with ID {concept_id} already exists")
self.conn.rollback()

_add_merged_record_query = b"""
Expand Down
11 changes: 6 additions & 5 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from gene.database import AbstractDatabase
from gene.schemas import Gene, GeneSequenceLocation, MatchType, SourceName

logger = logging.getLogger("gene")
logger.setLevel(logging.DEBUG)
_logger = logging.getLogger(__name__)


DATA_DISPATCH = {
Expand Down Expand Up @@ -70,6 +69,7 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
uploaded.
"""
self._extract_data(use_existing)
_logger.info("Transforming and loading %s data to DB...", self._src_name.value)
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._add_meta()
Expand All @@ -88,6 +88,7 @@ def _extract_data(self, use_existing: bool) -> None:
self._data_file, self._version = self._data_source.get_latest(
from_local=use_existing
)
_logger.info("Acquired data for %s: %s", self._src_name.value, self._data_file)

@abstractmethod
def _transform_data(self) -> None:
Expand All @@ -110,7 +111,7 @@ def _load_gene(self, gene: Dict) -> None:
try:
assert Gene(match_type=MatchType.NO_MATCH, **gene)
except pydantic.ValidationError as e:
logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
_logger.warning(f"Unable to load {gene} due to validation error: " f"{e}")
else:
concept_id = gene["concept_id"]
gene["label_and_type"] = f"{concept_id.lower()}##identity"
Expand Down Expand Up @@ -211,7 +212,7 @@ def _get_seq_id_aliases(self, seq_id: str) -> List[str]:
try:
aliases = self.seqrepo.translate_alias(seq_id, target_namespaces="ga4gh")
except KeyError as e:
logger.warning(f"SeqRepo raised KeyError: {e}")
_logger.warning(f"SeqRepo raised KeyError: {e}")
return aliases

def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Dict:
Expand All @@ -238,7 +239,7 @@ def _get_sequence_location(self, seq_id: str, gene: Feature, params: Dict) -> Di
sequence_id=sequence,
).model_dump() # type: ignore
else:
logger.warning(
_logger.warning(
f"{params['concept_id']} has invalid interval:"
f"start={gene.start - 1} end={gene.end}"
) # type: ignore
Expand Down
Loading
Loading