Merge pull request #33 from biocore/remove_setup.py

Remove setup.py
biocore · Feb 13, 2025 · 32248c0 · 32248c0
2 parents ba9cd5b + 1d94de0
commit 32248c0
Show file tree

Hide file tree

Showing 16 changed files with 167 additions and 779 deletions.
diff --git a/Makefile b/Makefile
@@ -4,5 +4,5 @@ test:
 	pytest micov
 	bash cli_test.sh
 lint:
-	ruff check micov setup.py
+	ruff check micov
 	check-manifest
diff --git a/micov/__init__.py b/micov/__init__.py
@@ -1,8 +1,5 @@
 """micov: microbiome coverage."""
 
-from . import _version
-
-__version__ = _version.get_versions()["version"]
 # note: currently for use with duckdb. we cannot easily enforce threads for polars
 # as a specific environment variable must be set prior to the first import. it's
 # doable but will need some engineeering to do it correctly.'And, polars does not

diff --git a/micov/_constants.py b/micov/_constants.py
@@ -33,7 +33,7 @@ def __init__(self):
 
 
 class _BED_COV_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_START, COLUMN_START_DTYPE),
         (COLUMN_STOP, COLUMN_STOP_DTYPE),
@@ -44,7 +44,7 @@ class _BED_COV_SCHEMA(_SCHEMA):
 
 
 class _BED_COV_WITH_SAMPLEID_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_START, COLUMN_START_DTYPE),
         (COLUMN_STOP, COLUMN_STOP_DTYPE),
@@ -60,8 +60,8 @@ class _SAM_SUBSET_SCHEMA(_SCHEMA):
     # concerned about.
     # for binary coverage, we don't care about the flag, but we're parsing it
     # now so we can care in the future.
-    column_indices = [0, 1, 2, 3, 5]
-    dtypes_flat = [
+    column_indices = [0, 1, 2, 3, 5]  # noqa: RUF012
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_READ_ID, COLUMN_READ_ID_DTYPE),
         (COLUMN_FLAG, COLUMN_FLAG_DTYPE),
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
@@ -74,7 +74,7 @@ class _SAM_SUBSET_SCHEMA(_SCHEMA):
 
 
 class _SAM_SUBSET_SCHEMA_PARSED(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_READ_ID, COLUMN_READ_ID_DTYPE),
         (COLUMN_FLAG, COLUMN_FLAG_DTYPE),
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
@@ -88,7 +88,7 @@ class _SAM_SUBSET_SCHEMA_PARSED(_SCHEMA):
 
 
 class _GENOME_LENGTH_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),
     ]
@@ -98,7 +98,7 @@ class _GENOME_LENGTH_SCHEMA(_SCHEMA):
 
 
 class _GENOME_TAXONOMY_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_TAXONOMY, COLUMN_TAXONOMY_DTYPE),
     ]
@@ -108,7 +108,7 @@ class _GENOME_TAXONOMY_SCHEMA(_SCHEMA):
 
 
 class _GENOME_COVERAGE_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_COVERED, COLUMN_COVERED_DTYPE),
         (COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),
@@ -120,7 +120,7 @@ class _GENOME_COVERAGE_SCHEMA(_SCHEMA):
 
 
 class _GENOME_COVERAGE_WITH_SAMPLEID_SCHEMA(_SCHEMA):
-    dtypes_flat = [
+    dtypes_flat = [  # noqa: RUF012
         (COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
         (COLUMN_COVERED, COLUMN_COVERED_DTYPE),
         (COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),

diff --git a/micov/_io.py b/micov/_io.py
@@ -112,15 +112,16 @@ def parse_qiita_coverages(tgzs, *args, **kwargs):
         Forwarded to _parse_qiita_coverages
 
     """
-    if not isinstance(tgzs, (list, tuple, set, frozenset)):
+    if not isinstance(tgzs, list | tuple | set | frozenset):
         tgzs = [
             tgzs,
         ]
 
     compress_size = kwargs.get("compress_size", 50_000_000)
 
     if compress_size is not None:
-        assert isinstance(compress_size, int) and compress_size >= 0
+        assert isinstance(compress_size, int)
+        assert compress_size >= 0
     else:
         compress_size = math.inf
         kwargs["compress_size"] = compress_size
@@ -189,8 +190,8 @@ def _parse_qiita_coverages(
 
     try:
         fp.extractfile("coverage_percentage.txt")
-    except KeyError:
-        raise KeyError(f"{tgz} does not look like a Qiita coverage tgz")
+    except KeyError as e:
+        raise KeyError(f"{tgz} does not look like a Qiita coverage tgz") from e
 
     if sample_keep is None:
         sample_keep = SetOfAll()
@@ -558,24 +559,24 @@ def parse_coverage(data, features_to_keep):
     return cov_df
 
 
+def _first_col_as_set(fp):
+    df = pl.read_csv(fp, separator="\t", infer_schema_length=0)
+    return set(df[df.columns[0]])
+
+
 def combine_pos_metadata_length(
-    sample_metadata,
-    length,
-    covered_positions,
-    features_to_keep):
+    sample_metadata, length, covered_positions, features_to_keep
+):
     df_md = parse_sample_metadata(sample_metadata).lazy()
     df_length = parse_genome_lengths(length).lazy()
     df_pos = pl.scan_parquet(covered_positions)
 
-    df_pos_md = df_pos.join(
-        df_md, on=COLUMN_SAMPLE_ID, how="left"
-    ).join(
+    df_pos_md = df_pos.join(df_md, on=COLUMN_SAMPLE_ID, how="left").join(
         df_length, on=COLUMN_GENOME_ID, how="left"
     )
 
     if features_to_keep:
         features_to_keep = _first_col_as_set(features_to_keep)
-        df_pos_md = df_pos_md.filter(
-            pl.col(COLUMN_GENOME_ID).is_in(features_to_keep))
+        df_pos_md = df_pos_md.filter(pl.col(COLUMN_GENOME_ID).is_in(features_to_keep))
 
     return df_pos_md
diff --git a/micov/_per_sample.py b/micov/_per_sample.py
@@ -1,8 +1,9 @@
-from ._io import parse_qiita_coverages
-from ._constants import COLUMN_SAMPLE_ID
-from ._cov import coverage_percent, compress
 import polars as pl
 
+from ._constants import COLUMN_SAMPLE_ID
+from ._cov import compress, coverage_percent
+from ._io import parse_qiita_coverages
+
 
 def per_sample_coverage(qiita_coverages, current_samples, features_to_keep,
                         features_to_ignore, lengths):

diff --git a/micov/_plot.py b/micov/_plot.py
@@ -206,7 +206,7 @@ def add_monte(
     monte_y = []
     monte_x = list(range(max_x))
 
-    for it in range(iters):
+    for _ in range(iters):
         monte = (sample_set.select(pl.col(COLUMN_SAMPLE_ID).shuffle()).head(max_x))[
             COLUMN_SAMPLE_ID
         ]
@@ -460,7 +460,6 @@ def single_sample_position_plot(positions, lengths, output, scale=None):
 
         ax.set_title(f"Position plot: {name}", fontsize=20)
         ax.set_ylabel("Unit normalized position", fontsize=20)
-        scaletag = ""
 
         ax.tick_params(axis="both", which="major", labelsize=16)
         ax.tick_params(axis="both", which="minor", labelsize=16)
@@ -582,8 +581,8 @@ def position_plot(
         hist_x = []
         hist_y = []
 
-        col_selection = [COLUMN_SAMPLE_ID, COLUMN_GENOME_ID, "x_unscaled"]
-        for sid, gid, x in grp_coverage[col_selection].rows():
+        col_selection = [COLUMN_SAMPLE_ID, "x_unscaled"]
+        for sid, x in grp_coverage[col_selection].rows():
             cur_positions = (
                 target_positions.filter(pl.col(COLUMN_SAMPLE_ID) == sid)
                 .join(grp_coverage.lazy(), on=COLUMN_SAMPLE_ID)
@@ -614,7 +613,7 @@ def position_plot(
                 )
                 obs_bins = obs_bins[:-1][obs_count > 0]
                 hist_x.extend([x for _ in obs_bins])
-                hist_y.extend([b for b in obs_bins])
+                hist_y.extend(obs_bins)
 
         if scale is not None:
             ax.scatter(hist_x, hist_y, s=0.2, color=color, alpha=0.7)

diff --git a/micov/_quant.py b/micov/_quant.py
@@ -57,7 +57,7 @@ def pos_to_bins(pos, variable, bin_num):
     bin_list = create_bin_list(genome_length, bin_num)
 
     # get start_bin_idx and stop_bin_idx
-    bin_edges = [0.0] + bin_list.select(
+    bin_edges = [0.0] + bin_list.select(  # noqa: RUF005
         pl.col("bin_stop")
     ).collect().to_series().to_list()
     cut_start = (