Skip to content

Commit

Permalink
Merge branch 'main' into merge
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 2, 2024
2 parents 533af5c + 547f4a1 commit dc52928
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.2
rev: v0.1.9
hooks:
- id: ruff
- id: ruff-format
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ Gene Normalizer
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/gene-normalizer?color=gr) [![tests](https://github.com/cancervariants/gene-normalization/actions/workflows/github-actions.yml/badge.svg)](https://github.com/cancervariants/gene-normalization/actions/workflows/github-actions.yml) [![DOI](https://zenodo.org/badge/309797998.svg)](https://zenodo.org/badge/latestdoi/309797998)

## Overview

<!-- description -->
The Gene Normalizer provides tools for resolving ambiguous human gene references to consistently-structured, normalized terms. For gene concepts extracted from [NCBI Gene](https://www.ncbi.nlm.nih.gov/gene/), [Ensembl](https://useast.ensembl.org/index.html), and [HGNC](https://www.genenames.org/), it designates a [CURIE](https://en.wikipedia.org/wiki/CURIE), and provides additional metadata like current and previously-used symbols, aliases, database cross-references and associations, and coordinates.

<!-- /description -->
---

**[Live service](https://normalize.cancervariants.org/gene)**
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dynamic = ["version"]
pg = ["psycopg[binary]"]
etl = ["gffutils", "biocommons.seqrepo", "wags-tails>=0.1.1"]
test = ["pytest>=6.0", "pytest-cov", "mock", "httpx"]
dev = ["pre-commit", "ruff>=0.1.2"]
dev = ["pre-commit", "ruff>=0.1.9"]
docs = [
"sphinx==6.1.3",
"sphinx-autodoc-typehints==1.22.0",
Expand Down
12 changes: 10 additions & 2 deletions src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path
from typing import Dict, List, Optional, Union

import click
import pydantic
from biocommons.seqrepo import SeqRepo
from gffutils.feature import Feature
Expand Down Expand Up @@ -34,6 +35,13 @@ class GeneNormalizerEtlError(Exception):
}


DATA_DISPATCH = {
SourceName.HGNC: HgncData,
SourceName.ENSEMBL: EnsemblData,
SourceName.NCBI: NcbiGeneData,
}


class Base(ABC):
"""The ETL base class."""

Expand Down Expand Up @@ -79,6 +87,8 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
"""
self._extract_data(use_existing)
_logger.info(f"Transforming and loading {self._src_name} data to DB...")
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._add_meta()
self._transform_data()
self._database.complete_write_transaction()
Expand All @@ -93,11 +103,9 @@ def _extract_data(self, use_existing: bool) -> None:
:param bool use_existing: if True, don't try to fetch latest source data
"""
_logger.info(f"Gathering {self._src_name} data...")
self._data_file, self._version = self._data_source.get_latest(
from_local=use_existing
)
_logger.info(f"Acquired data for {self._src_name}: {self._data_file}")

@abstractmethod
def _transform_data(self) -> None:
Expand Down

0 comments on commit dc52928

Please sign in to comment.