Skip to content

Commit

Permalink
print method
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 5, 2024
1 parent 8b9b017 commit f7be735
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 19 deletions.
17 changes: 13 additions & 4 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
uploaded.
"""
self._extract_data(use_existing)
_logger.info(f"Transforming and loading {self._src_name} data to DB...")
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._print_info(
f"Transforming and loading {self._src_name.value} data to DB..."
)
self._add_meta()
self._transform_data()
self._database.complete_write_transaction()
_logger.info(f"Data load complete for {self._src_name}.")
self._print_info(f"Data load complete for {self._src_name.value}.")
return self._processed_ids

def _extract_data(self, use_existing: bool) -> None:
Expand Down Expand Up @@ -248,3 +248,12 @@ def _get_seq_id_aliases(self, seq_id: str) -> List[str]:
except KeyError as e:
_logger.warning(f"SeqRepo raised KeyError: {e}")
return aliases

def _print_info(self, msg: str) -> None:
"""Log information and print to console if not on silent mode.
:param msg: message to print
"""
if not self._silent:
click.echo(msg)
_logger.info(msg)
4 changes: 1 addition & 3 deletions src/gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Dict, Optional
from urllib.parse import unquote

import click
import gffpandas.gffpandas as gffpd
import pandas as pd
from tqdm import tqdm
Expand Down Expand Up @@ -54,8 +53,7 @@ def _transform_data(self) -> None:

gene_df = df[df["ID"].str.startswith("gene", na=False)]

if not self._silent:
click.echo(f"Loading rows from {self._data_file}:")
self._print_info(f"Loading rows from {self._data_file}:")
for _, row in tqdm(
gene_df.iterrows(), total=gene_df.shape[0], disable=self._silent, ncols=80
):
Expand Down
4 changes: 1 addition & 3 deletions src/gene/etl/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import re
from typing import Dict, List

import click
from tqdm import tqdm

from gene.etl.base import Base, GeneNormalizerEtlError
Expand Down Expand Up @@ -32,8 +31,7 @@ def _transform_data(self) -> None:

records = data["response"]["docs"]

if not self._silent:
click.echo(f"Loading rows from {self._data_file}:")
self._print_info(f"Loading rows from {self._data_file}:")
for r in tqdm(records, total=len(records), disable=self._silent, ncols=80):
gene = {
"concept_id": r["hgnc_id"].lower(),
Expand Down
13 changes: 4 additions & 9 deletions src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import Any, Dict, List, Optional
from urllib.parse import unquote

import click
import gffpandas.gffpandas as gffpd
import pandas as pd
from tqdm import tqdm
Expand Down Expand Up @@ -83,8 +82,7 @@ def _get_prev_symbols(self) -> Dict[str, str]:
history = csv.reader(history_file, delimiter="\t")
next(history)
prev_symbols = {}
if not self._silent:
click.echo(f"Gathering previous symbols from {self._history_src}")
self._print_info(f"Gathering previous symbols from {self._history_src}")
for row in tqdm(history, total=length, ncols=80, disable=self._silent):
if row[0] != "9606":
continue # humans only
Expand Down Expand Up @@ -150,8 +148,7 @@ def _get_gene_info(self, prev_symbols: Dict[str, str]) -> Dict[str, Dict]:
info = csv.reader(info_file, delimiter="\t")
next(info)

if not self._silent:
click.echo(f"Extracting genes from {self._info_src}")
self._print_info(f"Extracting genes from {self._info_src}")
for row in tqdm(info, total=length, ncols=80, disable=self._silent):
params: Dict[str, Any] = {
"concept_id": f"{NamespacePrefix.NCBI.value}:{row[1]}",
Expand Down Expand Up @@ -196,8 +193,7 @@ def _get_gene_gff(self, df: pd.DataFrame, info_genes: Dict) -> None:
:param info_genes: A dictionary of gene's from the NCBI info file.
"""
genes_df = df[df["ID"].str.startswith("gene", na=False)]
if not self._silent:
click.echo(f"Extracting genes from {self._info_src}")
self._print_info(f"Extracting genes from {self._info_src}")
for _, row in tqdm(
genes_df.iterrows(), total=genes_df.shape[0], ncols=80, disable=self._silent
):
Expand Down Expand Up @@ -440,8 +436,7 @@ def _transform_data(self) -> None:
df = gffpd.read_gff3(self._gff_src).attributes_to_columns()
self._get_gene_gff(df, info_genes)

if not self._silent:
click.echo("Loading completed gene objects...")
self._print_info("Loading completed gene objects...")
for gene in tqdm(
info_genes.values(), total=len(info_genes), disable=self._silent, ncols=80
):
Expand Down

0 comments on commit f7be735

Please sign in to comment.