From b7c0d2070929846c19f3b9a1d0cc0074f8d583b4 Mon Sep 17 00:00:00 2001 From: Chris Rosenthal Date: Tue, 25 Jun 2024 16:14:15 -0700 Subject: [PATCH] cleaned up some pandas deprication --- deenurp/subcommands/filter_outliers.py | 3 +-- deenurp/test/test_outliers.py | 3 ++- deenurp/wrap.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deenurp/subcommands/filter_outliers.py b/deenurp/subcommands/filter_outliers.py index 0da6d2c..55519ad 100644 --- a/deenurp/subcommands/filter_outliers.py +++ b/deenurp/subcommands/filter_outliers.py @@ -279,8 +279,7 @@ def parse_usearch_allpairs(filename, seqnames): # for each sequence pair, select the longest alignment if there is # more than one (chooses first occurrence if there are two the same # length). - maxidx = data.groupby(['query', 'target']).apply( - lambda x: x['align_len'].idxmax()) + maxidx = data.groupby(['query', 'target'])['align_len'].idxmax() data = data.iloc[maxidx] if set(seqnames) != set(data['query']) | set(data['target']): diff --git a/deenurp/test/test_outliers.py b/deenurp/test/test_outliers.py index ab8efc6..acb8ad4 100644 --- a/deenurp/test/test_outliers.py +++ b/deenurp/test/test_outliers.py @@ -1,3 +1,4 @@ +import io import os import unittest @@ -95,7 +96,7 @@ def test_choose_clusters(self): "medoid":{"0":238.0,"1":null,"2":284.0}, "dist":{"0":0.0,"1":null,"2":0.089}}""" - df = pd.read_json(s) + df = pd.read_json(io.StringIO(s)) # output is a set of cluster names (not indices) self.assertSetEqual(set(outliers.choose_clusters(df, 2, 0.015)), {0}) self.assertSetEqual(set(outliers.choose_clusters(df, 2, 0.1)), {0, 1}) diff --git a/deenurp/wrap.py b/deenurp/wrap.py index aaf90ec..16efc84 100644 --- a/deenurp/wrap.py +++ b/deenurp/wrap.py @@ -9,7 +9,7 @@ import os.path import subprocess import re -from distutils.version import LooseVersion +from packaging.version import Version from io import StringIO import pandas as pd @@ -227,10 +227,10 @@ def cmalign_scores(text): return pd.read_csv( StringIO(text), comment="#", - delim_whitespace=True, dtype=dtypes, index_col='seq_name', - names=dtypes.keys() + names=dtypes.keys(), + sep='\s+' ) @@ -281,7 +281,7 @@ def _require_vsearch_version(vsearch=VSEARCH, version=VSEARCH_VERSION): vsearch = re.search(r'^vsearch v(?P\d+\.\d+\.[^_]+)', output.stderr) ver = vsearch.groupdict()['vstr'] - if LooseVersion(ver) < LooseVersion(version): + if Version(ver) < Version(version): raise MissingDependencyError( 'vsearch version >= v{} is required, got v{}'.format(version, ver))