diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2b0ba8a9..53c39223 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.2 + rev: v0.1.9 hooks: - id: ruff - id: ruff-format diff --git a/README.md b/README.md index 2e2af4ed..ab4da8af 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ Gene Normalizer ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/gene-normalizer?color=gr) [![tests](https://github.com/cancervariants/gene-normalization/actions/workflows/github-actions.yml/badge.svg)](https://github.com/cancervariants/gene-normalization/actions/workflows/github-actions.yml) [![DOI](https://zenodo.org/badge/309797998.svg)](https://zenodo.org/badge/latestdoi/309797998) ## Overview - + The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from [NCBI Gene](https://www.ncbi.nlm.nih.gov/gene/), [Ensembl](https://useast.ensembl.org/index.html), and [HGNC](https://www.genenames.org/), it designates a [CURIE](https://en.wikipedia.org/wiki/CURIE), and provides additional metadata like current and previously-used symbols, aliases, database cross-references and associations, and coordinates. - + --- **[Live service](https://normalize.cancervariants.org/gene)** diff --git a/pyproject.toml b/pyproject.toml index df44570c..2788672a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dynamic = ["version"] pg = ["psycopg[binary]"] etl = ["gffutils", "biocommons.seqrepo", "wags-tails>=0.1.1"] test = ["pytest>=6.0", "pytest-cov", "mock", "httpx"] -dev = ["pre-commit", "ruff>=0.1.2"] +dev = ["pre-commit", "ruff>=0.1.9"] docs = [ "sphinx==6.1.3", "sphinx-autodoc-typehints==1.22.0", diff --git a/src/gene/etl/base.py b/src/gene/etl/base.py index 768165ae..c152c152 100644 --- a/src/gene/etl/base.py +++ b/src/gene/etl/base.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Dict, List, Optional, Union +import click import pydantic from biocommons.seqrepo import SeqRepo from gffutils.feature import Feature @@ -34,6 +35,13 @@ class GeneNormalizerEtlError(Exception): } +DATA_DISPATCH = { + SourceName.HGNC: HgncData, + SourceName.ENSEMBL: EnsemblData, + SourceName.NCBI: NcbiGeneData, +} + + class Base(ABC): """The ETL base class.""" @@ -79,6 +87,8 @@ def perform_etl(self, use_existing: bool = False) -> List[str]: """ self._extract_data(use_existing) _logger.info(f"Transforming and loading {self._src_name} data to DB...") + if not self._silent: + click.echo("Transforming and loading data to DB...") self._add_meta() self._transform_data() self._database.complete_write_transaction() @@ -93,11 +103,9 @@ def _extract_data(self, use_existing: bool) -> None: :param bool use_existing: if True, don't try to fetch latest source data """ - _logger.info(f"Gathering {self._src_name} data...") self._data_file, self._version = self._data_source.get_latest( from_local=use_existing ) - _logger.info(f"Acquired data for {self._src_name}: {self._data_file}") @abstractmethod def _transform_data(self) -> None: