Skip to content

Commit

Permalink
add primary_target_only to prob_escape (becomes version 1.4.3) (#84)
Browse files Browse the repository at this point in the history
* fix deprecation warning for `corr`

* use `altair` version 5 in tests

* add `primary_target_only` to `prob_escape`

* start testing on Python 3.11

* update version to 1.4.3

* `black` formatting and pass tests

* fix `primary_target_only` filtering in `prob_escape`

* add check in `prob_escape` that the neutralization standard target exists

* test on Python 3.10

* try testing on Python 3.8

* pass `flake8`
  • Loading branch information
jbloom authored Mar 19, 2023
1 parent 7974f4d commit 1432d62
Show file tree
Hide file tree
Showing 22 changed files with 491 additions and 437 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@ All notable changes to this project will be documented in this file.

The format is based on `Keep a Changelog <https://keepachangelog.com>`_.

1.4.3
-----

Added
++++++
- Added ``primary_target_only`` option ``prob_escape``.

Changed
+++++++
- Use ``altair`` version 5.0.0rc1

1.4.2
------

Expand Down
2 changes: 1 addition & 1 deletion dms_variants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@

__author__ = "`the Bloom lab <https://research.fhcrc.org/bloom/en.html>`_"
__email__ = "[email protected]"
__version__ = "1.4.2"
__version__ = "1.4.3"
__url__ = "https://github.com/jbloomlab/dms_variants"
23 changes: 19 additions & 4 deletions dms_variants/codonvarianttable.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ def prob_escape(
min_neut_standard_count=1e3,
ceil_n_aa_substitutions=4,
drop_neut_standard_target=True,
primary_target_only=False,
):
r"""Compute probability of escape relative to a neutralization standard.
Expand Down Expand Up @@ -756,6 +757,9 @@ def prob_escape(
drop_neut_standard_target : bool
Drop the neutralization standard variant-level results from the
returned data frames.
primary_target_only : bool
Drop everything except the primary target and neutralization standard
target before beginning calculations.
Returns
-------
Expand Down Expand Up @@ -811,10 +815,18 @@ def prob_escape(
if len(invalid_samples):
raise ValueError(f"invalid samples in selections_df\n{invalid_samples}")

valid_targets = self.barcode_variant_df["target"].unique()
if neut_standard_target not in valid_targets:
raise ValueError(f"{neut_standard_target=} not in targets {valid_targets}")

# get neut_standard fracs for each library / sample
fracs = self.n_variants_df(primary_target_only=False)
if primary_target_only:
fracs = fracs.query(
"(target in [@self.primary_target, @neut_standard_target])"
)
fracs = (
self.n_variants_df(primary_target_only=False)
.assign(
fracs.assign(
n=lambda x: x.groupby(["library", "sample"])["count"].transform("sum"),
frac=lambda x: x["count"] / x["n"],
)
Expand Down Expand Up @@ -852,6 +864,10 @@ def prob_escape(

# get variant counts grouped by `by`
count_df = self.variant_count_df
if primary_target_only:
count_df = count_df.query(
"(target in [@self.primary_target, @neut_standard_target])"
)
group_cols = [
"codon_substitutions",
"n_codon_substitutions",
Expand Down Expand Up @@ -2146,7 +2162,7 @@ def plotCountsPerVariant(
if not classifyVariants_kwargs:
kw_args = {}
else:
kw_args = {k: v for k, v in classifyVariants_kwargs.items()}
kw_args = dict(classifyVariants_kwargs.items())
if "primary_target" not in kw_args:
kw_args["primary_target"] = self.primary_target
if "class_as_categorical" not in kw_args:
Expand Down Expand Up @@ -3025,7 +3041,6 @@ def _parseCodonMut(mutstr):
for lib, sample in itertools.product(
df["library"].unique().tolist(), df["sample"].unique().tolist()
):

i_df = df.query("library == @lib & sample == @sample")
if len(i_df) == 0:
continue # no data for this library and sample
Expand Down
1 change: 0 additions & 1 deletion dms_variants/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ def iterate_fastq_pair(
)

for r1_entry, r2_entry in itertools.zip_longest(r1_iterator, r2_iterator):

if (r1_entry is None) or (r2_entry is None):
raise IOError(
f"{r1filename} and {r2filename} have unequal " "number of entries"
Expand Down
7 changes: 4 additions & 3 deletions dms_variants/globalepistasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,9 +1017,7 @@ def phenotypes_frombinary(
assert latents.shape[0] == binary_variants.shape[1]
latent_phenos = binary_variants.dot(latents).transpose()
else:
latents = self._latenteffects.transpose()[
:-1,
]
latents = self._latenteffects.transpose()[:-1,]
assert latents.shape[0] == binary_variants.shape[1]
latent_phenos = (
binary_variants.dot(latents)
Expand Down Expand Up @@ -2223,6 +2221,7 @@ def __init__(
"increasing `pseudocount` if you have fitting "
"problems",
EpistasisFittingWarning,
stacklevel=2,
)
f.flags.writeable = False
setattr(self, f"_f_{cond}", f)
Expand Down Expand Up @@ -2891,6 +2890,7 @@ def _prescale_params(self, k, g_k_range):
f"({currentrange}); so cannot pre-scale. Just "
"setting all latent effects to zero",
EpistasisFittingWarning,
stacklevel=2,
)
rescaled_latenteffects[ki] = 0
rescaled_latenteffects[ki] = numpy.append(
Expand Down Expand Up @@ -2964,6 +2964,7 @@ def _postscale_params(self):
f"is nearly zero ({mean_abs_latent_effect}); "
"so cannot rescale",
EpistasisFittingWarning,
stacklevel=2,
)
else:
rescaled_latenteffects[ki] = (
Expand Down
1 change: 0 additions & 1 deletion dms_variants/illuminabarcodeparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ def parse(self, r1files, *, r2files=None, add_cols=None):
)

for entry in iterator:

if r1only:
readlist = [entry[1]]
qlist = [entry[2]]
Expand Down
5 changes: 1 addition & 4 deletions dms_variants/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def codon_muts(codonseq, nmuts, nvariants):
mutlist = []
for _ in range(nvariants):
sitemuts = []
for site, wt in sorted(random.sample(codons.items(), nmuts)):
for site, wt in sorted(random.sample(list(codons.items()), nmuts)):
mut = random.choice([c for c in CODONS if c != wt])
sitemuts.append(f"{wt}{site}{mut}")
mutlist.append(" ".join(sitemuts))
Expand Down Expand Up @@ -312,15 +312,13 @@ def random_sample(sites, n, p):

barcode_variant_dict = collections.defaultdict(list)
for lib, specs_dict in sorted(library_specs.items()):

nvariants = specs_dict["nvariants"]
avgmuts = specs_dict["avgmuts"]
if 10 * nvariants > (len(NTS)) ** bclen: # safety factor 10
raise ValueError("barcode too short for nvariants")
existing_barcodes = set()

for _ivariant in range(nvariants):

barcode = "".join(random.choices(NTS, k=bclen))
while barcode in existing_barcodes:
barcode = "".join(random.choices(NTS, k=bclen))
Expand Down Expand Up @@ -583,7 +581,6 @@ def _bottleneck_freqs(pre_freq, bottleneck):
for lib, (sample, sample_dict) in itertools.product( # noqa: B007
libraries, sorted(post_samples.items())
):

if set(sample_dict.keys()) != post_req_keys:
raise ValueError(f"post_samples {sample} lacks {post_req_keys}")

Expand Down
6 changes: 5 additions & 1 deletion dms_variants/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,11 @@ def tidy_to_corr(
if group_cols:
df = df.groupby(group_cols)

corr = df.corr(method=method).dropna(how="all", axis="index").reset_index()
corr = (
df.corr(method=method, numeric_only=True)
.dropna(how="all", axis="index")
.reset_index()
)

corr.columns.name = None # remove name of columns index

Expand Down
1 change: 0 additions & 1 deletion notebooks/bottleneck_likelihood.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@
"for N_bottle, p_v in itertools.product(\n",
" [5e4, 1e5, 5e5, 1e6, 5e6], [-0.5, -0.1, 0, 0.1, 0.5]\n",
"):\n",
"\n",
" # bottleneck log likelihood\n",
" n_v_bottle = f_post_v * N_bottle * mean_enrichment / 2**p_v\n",
" L_v = (\n",
Expand Down
22 changes: 21 additions & 1 deletion notebooks/codonvariant_plot_formatting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumCodonMutsByType(\"all\", samples=None)\n",
"_ = p.draw()"
]
Expand Down Expand Up @@ -392,6 +394,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = p + theme(panel_grid_major_x=element_blank()) # no vertical grid lines\n",
"_ = p.draw()"
]
Expand Down Expand Up @@ -426,6 +430,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"theme_set(theme_bw())\n",
"p = variants.plotNumCodonMutsByType(\"all\", samples=None)\n",
"_ = p.draw()"
Expand Down Expand Up @@ -464,6 +470,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"theme_set(theme_xkcd())\n",
"p = variants.plotNumCodonMutsByType(\n",
" \"all\", samples=None, heightscale=1.2, widthscale=1.2\n",
Expand Down Expand Up @@ -503,6 +511,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"theme_set(dms_variants.plotnine_themes.theme_graygrid()) # restore gray-grid theme\n",
"\n",
"p = variants.plotNumCodonMutsByType(\n",
Expand Down Expand Up @@ -544,6 +554,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumCodonMutsByType(\n",
" \"all\",\n",
" samples=\"all\",\n",
Expand Down Expand Up @@ -580,6 +592,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumCodonMutsByType(\n",
" \"all\",\n",
" samples=\"all\",\n",
Expand Down Expand Up @@ -616,6 +630,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumCodonMutsByType(\n",
" \"all\",\n",
" samples=\"all\",\n",
Expand Down Expand Up @@ -653,6 +669,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumCodonMutsByType(\n",
" \"all\",\n",
" samples=\"all\",\n",
Expand Down Expand Up @@ -690,6 +708,8 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"\n",
"p = variants.plotNumMutsHistogram(\n",
" mut_type=\"codon\",\n",
" samples=[\"pre-selection\", \"tight_bottle\"],\n",
Expand Down Expand Up @@ -749,7 +769,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.11.0"
},
"toc": {
"nav_menu": {},
Expand Down
Loading

0 comments on commit 1432d62

Please sign in to comment.