Skip to content

Commit

Permalink
Merge pull request #33 from biocore/remove_setup.py
Browse files Browse the repository at this point in the history
Remove setup.py
  • Loading branch information
wasade authored Feb 13, 2025
2 parents ba9cd5b + 1d94de0 commit 32248c0
Show file tree
Hide file tree
Showing 16 changed files with 167 additions and 779 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ test:
pytest micov
bash cli_test.sh
lint:
ruff check micov setup.py
ruff check micov
check-manifest
3 changes: 0 additions & 3 deletions micov/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""micov: microbiome coverage."""

from . import _version

__version__ = _version.get_versions()["version"]
# note: currently for use with duckdb. we cannot easily enforce threads for polars
# as a specific environment variable must be set prior to the first import. it's
# doable but will need some engineeering to do it correctly.'And, polars does not
Expand Down
18 changes: 9 additions & 9 deletions micov/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self):


class _BED_COV_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_START, COLUMN_START_DTYPE),
(COLUMN_STOP, COLUMN_STOP_DTYPE),
Expand All @@ -44,7 +44,7 @@ class _BED_COV_SCHEMA(_SCHEMA):


class _BED_COV_WITH_SAMPLEID_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_START, COLUMN_START_DTYPE),
(COLUMN_STOP, COLUMN_STOP_DTYPE),
Expand All @@ -60,8 +60,8 @@ class _SAM_SUBSET_SCHEMA(_SCHEMA):
# concerned about.
# for binary coverage, we don't care about the flag, but we're parsing it
# now so we can care in the future.
column_indices = [0, 1, 2, 3, 5]
dtypes_flat = [
column_indices = [0, 1, 2, 3, 5] # noqa: RUF012
dtypes_flat = [ # noqa: RUF012
(COLUMN_READ_ID, COLUMN_READ_ID_DTYPE),
(COLUMN_FLAG, COLUMN_FLAG_DTYPE),
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
Expand All @@ -74,7 +74,7 @@ class _SAM_SUBSET_SCHEMA(_SCHEMA):


class _SAM_SUBSET_SCHEMA_PARSED(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_READ_ID, COLUMN_READ_ID_DTYPE),
(COLUMN_FLAG, COLUMN_FLAG_DTYPE),
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
Expand All @@ -88,7 +88,7 @@ class _SAM_SUBSET_SCHEMA_PARSED(_SCHEMA):


class _GENOME_LENGTH_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),
]
Expand All @@ -98,7 +98,7 @@ class _GENOME_LENGTH_SCHEMA(_SCHEMA):


class _GENOME_TAXONOMY_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_TAXONOMY, COLUMN_TAXONOMY_DTYPE),
]
Expand All @@ -108,7 +108,7 @@ class _GENOME_TAXONOMY_SCHEMA(_SCHEMA):


class _GENOME_COVERAGE_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_COVERED, COLUMN_COVERED_DTYPE),
(COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),
Expand All @@ -120,7 +120,7 @@ class _GENOME_COVERAGE_SCHEMA(_SCHEMA):


class _GENOME_COVERAGE_WITH_SAMPLEID_SCHEMA(_SCHEMA):
dtypes_flat = [
dtypes_flat = [ # noqa: RUF012
(COLUMN_GENOME_ID, COLUMN_GENOME_ID_DTYPE),
(COLUMN_COVERED, COLUMN_COVERED_DTYPE),
(COLUMN_LENGTH, COLUMN_LENGTH_DTYPE),
Expand Down
27 changes: 14 additions & 13 deletions micov/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,16 @@ def parse_qiita_coverages(tgzs, *args, **kwargs):
Forwarded to _parse_qiita_coverages
"""
if not isinstance(tgzs, (list, tuple, set, frozenset)):
if not isinstance(tgzs, list | tuple | set | frozenset):
tgzs = [
tgzs,
]

compress_size = kwargs.get("compress_size", 50_000_000)

if compress_size is not None:
assert isinstance(compress_size, int) and compress_size >= 0
assert isinstance(compress_size, int)
assert compress_size >= 0
else:
compress_size = math.inf
kwargs["compress_size"] = compress_size
Expand Down Expand Up @@ -189,8 +190,8 @@ def _parse_qiita_coverages(

try:
fp.extractfile("coverage_percentage.txt")
except KeyError:
raise KeyError(f"{tgz} does not look like a Qiita coverage tgz")
except KeyError as e:
raise KeyError(f"{tgz} does not look like a Qiita coverage tgz") from e

if sample_keep is None:
sample_keep = SetOfAll()
Expand Down Expand Up @@ -558,24 +559,24 @@ def parse_coverage(data, features_to_keep):
return cov_df


def _first_col_as_set(fp):
df = pl.read_csv(fp, separator="\t", infer_schema_length=0)
return set(df[df.columns[0]])


def combine_pos_metadata_length(
sample_metadata,
length,
covered_positions,
features_to_keep):
sample_metadata, length, covered_positions, features_to_keep
):
df_md = parse_sample_metadata(sample_metadata).lazy()
df_length = parse_genome_lengths(length).lazy()
df_pos = pl.scan_parquet(covered_positions)

df_pos_md = df_pos.join(
df_md, on=COLUMN_SAMPLE_ID, how="left"
).join(
df_pos_md = df_pos.join(df_md, on=COLUMN_SAMPLE_ID, how="left").join(
df_length, on=COLUMN_GENOME_ID, how="left"
)

if features_to_keep:
features_to_keep = _first_col_as_set(features_to_keep)
df_pos_md = df_pos_md.filter(
pl.col(COLUMN_GENOME_ID).is_in(features_to_keep))
df_pos_md = df_pos_md.filter(pl.col(COLUMN_GENOME_ID).is_in(features_to_keep))

return df_pos_md
7 changes: 4 additions & 3 deletions micov/_per_sample.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from ._io import parse_qiita_coverages
from ._constants import COLUMN_SAMPLE_ID
from ._cov import coverage_percent, compress
import polars as pl

from ._constants import COLUMN_SAMPLE_ID
from ._cov import compress, coverage_percent
from ._io import parse_qiita_coverages


def per_sample_coverage(qiita_coverages, current_samples, features_to_keep,
features_to_ignore, lengths):
Expand Down
9 changes: 4 additions & 5 deletions micov/_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def add_monte(
monte_y = []
monte_x = list(range(max_x))

for it in range(iters):
for _ in range(iters):
monte = (sample_set.select(pl.col(COLUMN_SAMPLE_ID).shuffle()).head(max_x))[
COLUMN_SAMPLE_ID
]
Expand Down Expand Up @@ -460,7 +460,6 @@ def single_sample_position_plot(positions, lengths, output, scale=None):

ax.set_title(f"Position plot: {name}", fontsize=20)
ax.set_ylabel("Unit normalized position", fontsize=20)
scaletag = ""

ax.tick_params(axis="both", which="major", labelsize=16)
ax.tick_params(axis="both", which="minor", labelsize=16)
Expand Down Expand Up @@ -582,8 +581,8 @@ def position_plot(
hist_x = []
hist_y = []

col_selection = [COLUMN_SAMPLE_ID, COLUMN_GENOME_ID, "x_unscaled"]
for sid, gid, x in grp_coverage[col_selection].rows():
col_selection = [COLUMN_SAMPLE_ID, "x_unscaled"]
for sid, x in grp_coverage[col_selection].rows():
cur_positions = (
target_positions.filter(pl.col(COLUMN_SAMPLE_ID) == sid)
.join(grp_coverage.lazy(), on=COLUMN_SAMPLE_ID)
Expand Down Expand Up @@ -614,7 +613,7 @@ def position_plot(
)
obs_bins = obs_bins[:-1][obs_count > 0]
hist_x.extend([x for _ in obs_bins])
hist_y.extend([b for b in obs_bins])
hist_y.extend(obs_bins)

if scale is not None:
ax.scatter(hist_x, hist_y, s=0.2, color=color, alpha=0.7)
Expand Down
2 changes: 1 addition & 1 deletion micov/_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def pos_to_bins(pos, variable, bin_num):
bin_list = create_bin_list(genome_length, bin_num)

# get start_bin_idx and stop_bin_idx
bin_edges = [0.0] + bin_list.select(
bin_edges = [0.0] + bin_list.select( # noqa: RUF005
pl.col("bin_stop")
).collect().to_series().to_list()
cut_start = (
Expand Down
Loading

0 comments on commit 32248c0

Please sign in to comment.