Skip to content

Commit

Permalink
update to use newer versions of dependencies (#87)
Browse files Browse the repository at this point in the history
* remove deprecated `scipy` functions in favor of `numpy` ones

* re-format with latest version of `black`

* lint with `ruff`

* add `pyarrow` dependency as required by new `pandas`

* pass tests w new versions of dependencies

* test via GitHub actions, not Travis

* remove Travis test file
  • Loading branch information
jbloom authored Feb 6, 2024
1 parent 1432d62 commit f5e0a8f
Show file tree
Hide file tree
Showing 35 changed files with 1,595 additions and 682 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Run tests

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
test:
name: Run tests
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: checkout
uses: actions/checkout@v4

- name: install python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: install package and dependencies
run: pip install -e . && pip install -r test_requirements.txt

- name: lint code with ruff
run: ruff check .

- name: check code format with black
run: black --check .

- name: test code with `pytest`
run: pytest
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
_*

!.gitignore
!.github
!.travis.yml
!.flake8
!.nojekyll
Expand Down
25 changes: 0 additions & 25 deletions .travis.yml

This file was deleted.

9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ All notable changes to this project will be documented in this file.

The format is based on `Keep a Changelog <https://keepachangelog.com>`_.

1.5.0
-----
- Remove use of deprecated ``scipy`` functions like ``flip`` to use ``numpy`` alternatives instead (fixes [this issue](https://github.com/jbloomlab/dms_variants/issues/86)).
- Re-format code with latest version of ``black``.
- Lint with ``ruff`` rather than ``flake8``
- Add ``pyarrow`` as dependency as required by ``pandas``.
- Tweaks to work with new versions of ``pandas`` and ``plotnine``
- Test with GitHub Actions rather than Travis CI

1.4.3
-----

Expand Down
10 changes: 8 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ dms_variants
.. image:: https://img.shields.io/pypi/v/dms_variants.svg
:target: https://pypi.python.org/pypi/dms_variants

.. image:: https://app.travis-ci.com/jbloomlab/dms_variants.svg
:target: https://app.travis-ci.com/github/jbloomlab/dms_variants
.. image:: https://github.com/jbloomlab/dms_variants/actions/workflows/test.yaml/badge.svg
:target: https://github.com/jbloomlab/dms_variants/actions/workflows/test.yaml

.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black

.. image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json
:target: https://github.com/astral-sh/ruff

.. image:: https://mybinder.org/badge_logo.svg
:target: https://mybinder.org/v2/gh/jbloomlab/dms_variants/master?filepath=notebooks
Expand Down
2 changes: 1 addition & 1 deletion dms_variants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@

__author__ = "`the Bloom lab <https://research.fhcrc.org/bloom/en.html>`_"
__email__ = "[email protected]"
__version__ = "1.4.3"
__version__ = "1.5.0"
__url__ = "https://github.com/jbloomlab/dms_variants"
1 change: 0 additions & 1 deletion dms_variants/bottlenecks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"""


import numpy

import scipy.optimize
Expand Down
14 changes: 8 additions & 6 deletions dms_variants/codonvarianttable.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,12 @@ def from_variant_count_df(
if "target" in set(df.columns):
raise ValueError('primary_target is None but "target" col')

if not set(req_cols).issubset((df.columns)):
if not set(req_cols).issubset(df.columns):
raise ValueError(
f"{variant_count_df_file} lacks required "
f"columns {req_cols}. It has: {set(df.columns)}"
)
if extra_cols and not set(extra_cols).issubset((df.columns)):
if extra_cols and not set(extra_cols).issubset(df.columns):
raise ValueError(
f"{variant_count_df_file} lacks `extra_cols` "
f"columns {extra_cols}. Has: {set(df.columns)}"
Expand Down Expand Up @@ -827,7 +827,9 @@ def prob_escape(
)
fracs = (
fracs.assign(
n=lambda x: x.groupby(["library", "sample"])["count"].transform("sum"),
n=lambda x: x.groupby(["library", "sample"], observed=False)[
"count"
].transform("sum"),
frac=lambda x: x["count"] / x["n"],
)
.query("target == @neut_standard_target")
Expand Down Expand Up @@ -1101,7 +1103,7 @@ def escape_scores(
or :math:`B_v`.
Parameters
-----------
----------
sample_df : pandas.DataFrame
Comparisons we use to compute the functional scores. Should have
these columns: 'pre_sample' (pre-selection sample), 'post_sample'
Expand Down Expand Up @@ -2090,7 +2092,7 @@ def plotCountsPerVariant(
"""Plot variant index versus counts (or frac counts).
Parameters
-----------
----------
ystat : {'frac_counts', 'count'}
Is y-axis counts from variant, or fraction of counts in
library / sample from variant?
Expand Down Expand Up @@ -2610,7 +2612,7 @@ def plotNumCodonMutsByType(
)
+ p9.theme(
figure_size=(width, height),
axis_title_x=p9.element_blank(),
axis_title_x=None,
axis_text_x=p9.element_text(angle=90),
legend_position="none",
)
Expand Down
1 change: 0 additions & 1 deletion dms_variants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"""


import Bio.Data.IUPACData
import Bio.Seq

Expand Down
11 changes: 5 additions & 6 deletions dms_variants/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"""


import collections
import gzip
import itertools
Expand Down Expand Up @@ -148,12 +147,12 @@ def iterate_fastq_pair(

for r1_entry, r2_entry in itertools.zip_longest(r1_iterator, r2_iterator):
if (r1_entry is None) or (r2_entry is None):
raise IOError(
raise OSError(
f"{r1filename} and {r2filename} have unequal " "number of entries"
)

if r1_entry[0] != r2_entry[0]:
raise IOError(
raise OSError(
f"{r1filename} and {r2filename} specify different "
f"read IDs:\n{r1_entry[0]}\n{r2_entry[0]}"
)
Expand Down Expand Up @@ -255,7 +254,7 @@ def iterate_fastq(filename, *, trim=None, check_pair=None, qual_format="str"):
raise ValueError(f"invalid `check_pair` of {check_pair}")

if not os.path.isfile(filename):
raise IOError(f"no FASTQ file {filename}")
raise OSError(f"no FASTQ file {filename}")

if qual_format == "array":
qual_to_array = True
Expand All @@ -273,7 +272,7 @@ def iterate_fastq(filename, *, trim=None, check_pair=None, qual_format="str"):
head = f.readline()
while head:
if head[0] != "@":
raise IOError(f"id starts with {head[0]}, not @:\n{head}")
raise OSError(f"id starts with {head[0]}, not @:\n{head}")
else:
head = head.rstrip()
headspl = head[1:].split()
Expand All @@ -282,7 +281,7 @@ def iterate_fastq(filename, *, trim=None, check_pair=None, qual_format="str"):
plusline = f.readline().rstrip()
qs = f.readline().rstrip()
if (not seq) or (len(seq) != len(qs)) or (plusline != "+"):
raise IOError(
raise OSError(
f"invalid entry for {read_id} in {filename}:\n"
f"{head}\n{seq}\n{plusline}\n{qs}"
)
Expand Down
33 changes: 15 additions & 18 deletions dms_variants/globalepistasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,6 @@
"""


import abc
import collections
import re
Expand Down Expand Up @@ -742,9 +741,9 @@ def _set_lower_latent_phenotype_params(self, model_one_less_latent):
for k in range(1, self.n_latent_phenotypes):
ki = k - 1
new_latenteffects[ki] = model_one_less_latent._latenteffects[ki]
new_epistasis_func_params[
ki
] = model_one_less_latent._epistasis_func_params[ki]
new_epistasis_func_params[ki] = (
model_one_less_latent._epistasis_func_params[ki]
)
self._latenteffects = new_latenteffects
self._epistasis_func_params = new_epistasis_func_params

Expand Down Expand Up @@ -910,7 +909,7 @@ def latent_phenotype_wt(self, k=None):
is just one latent phenotype, can also be `None`.
Returns
---------
-------
float
Wildtype latent phenotype, which is :math:`\beta_{\rm{wt}}` in
Eq. :eq:`latent_phenotype` or :math:`\beta_{\rm{wt}}^k` in
Expand Down Expand Up @@ -990,7 +989,7 @@ def phenotypes_frombinary(
if `phenotype` is 'observed'.
Returns
--------
-------
numpy.ndarray
Latent phenotypes calculated using Eq. :eq:`latent_phenotype` or
observed phenotypes calculated using Eq. :eq:`observed_phenotype`
Expand Down Expand Up @@ -1308,7 +1307,7 @@ def single_mut_effects(
reported only for mutations present in `AbstractEpistasis.binarymap`.
Parameters
-----------
----------
phenotype : {'latent', 'observed'}
Get effect on this phenotype. If there are multiple latent
phenotypes, you must also set `k`.
Expand Down Expand Up @@ -1564,7 +1563,7 @@ def _dloglik_by_allparams(self, allparams, negative=True):
optimize.
Returns
--------
-------
numpy.ndarray
(Negative) derivative of log likelihood with respect to
:meth:`AbstractEpistasis._allparams`.
Expand Down Expand Up @@ -1660,7 +1659,7 @@ def _latent_phenotypes(self, k=None):
"""Latent phenotypes.
Parameters
-----------
----------
k : int or None
Latent phenotype number (1 <= `k` <= `n_latent_phenotypes`),
or can be `None` if just one latent phenotype.
Expand Down Expand Up @@ -1696,7 +1695,7 @@ def _observed_phenotypes(self, latent_phenos="all"):
:math:`k` values listed here.
Returns
--------
-------
numpy.ndarray
Observed phenotypes.
Expand Down Expand Up @@ -1835,7 +1834,7 @@ def epistasis_func(self, latent_phenotype, k=None):
"""The :ref:`global_epistasis_function` :math:`g`.
Parameters
-----------
----------
latent_phenotype : numpy.ndarray
Latent phenotype(s) of one or more variants.
k : int or None
Expand All @@ -1857,7 +1856,7 @@ def _depistasis_func_dlatent(self, latent_phenotype, k=None):
"""Derivative of epistasis function by latent phenotype.
Parameters
-----------
----------
latent_phenotype : numpy.ndarray
Latent phenotype(s) of one or more variants.
k : int or None
Expand Down Expand Up @@ -1933,7 +1932,7 @@ def _prescale_params(self, k, g_k_range):
for `_epistasis_func_params`.
Parameters
-----------
----------
k : int
Latent phenotype number (1 <= `k` <= `n_latent_phenotypes`).
g_k_range : tuple
Expand Down Expand Up @@ -2495,9 +2494,7 @@ def _dloglik_dlikelihood_calc_params(self):
self._cache[key] = numpy.array(
[
0.5
* (
self._dloglik_dobserved_phenotype**2 - 1 / self._variances
).sum()
* (self._dloglik_dobserved_phenotype**2 - 1 / self._variances).sum()
]
)
self._cache[key].flags.writeable = False
Expand Down Expand Up @@ -2673,14 +2670,14 @@ def _isplines_total(self, k=None):
"""I-splines for global epistasis function.
Parameters
-----------
----------
k : int or None
Which global epistasis function to get I-splines for (1 <= k <=
:attr:`AbstractEpistasis.n_latent_phenotypes`). If there
is just one latent phenotype, can also be `None`.
Returns
--------
-------
:class:`dms_variants.ispline.Isplines_total`
The I-spline family defined with the current values of
the latent phenotypes as `x`.
Expand Down
10 changes: 5 additions & 5 deletions dms_variants/illuminabarcodeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@

import collections

import numpy

import pandas as pd

import regex

import scipy

from dms_variants.fastq import (
iterate_fastq,
iterate_fastq_pair,
Expand Down Expand Up @@ -290,11 +290,11 @@ def parse(self, r1files, *, r2files=None, add_cols=None):
if self.bc_orientation == "R1":
if not r1only:
bc["R2"] = reverse_complement(bc["R2"])
bc_q["R2"] = scipy.flip(bc_q["R2"], axis=0)
bc_q["R2"] = numpy.flip(bc_q["R2"], axis=0)
else:
assert self.bc_orientation == "R2"
bc["R1"] = reverse_complement(bc["R1"])
bc_q["R1"] = scipy.flip(bc_q["R1"], axis=0)
bc_q["R1"] = numpy.flip(bc_q["R1"], axis=0)
if r1only:
if (bc_q["R1"] >= self.minq).all():
if self.valid_barcodes and (
Expand All @@ -313,7 +313,7 @@ def parse(self, r1files, *, r2files=None, add_cols=None):
):
fates["invalid barcode"] += 1
elif (
scipy.maximum(bc_q["R1"], bc_q["R2"]) >= self.minq
numpy.maximum(bc_q["R1"], bc_q["R2"]) >= self.minq
).all():
barcodes[bc["R1"]] += 1
fates["valid barcode"] += 1
Expand Down
Loading

0 comments on commit f5e0a8f

Please sign in to comment.