Skip to content

Commit 7eba519

Browse files
authored
Merge pull request #43 from Robaina/42-pyopensci-review-minor-changes
PyOpenSci REVIEW - minor updates
2 parents b2d6595 + 8af7667 commit 7eba519

17 files changed

+152
-155
lines changed

pynteny/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from pynteny.api import *
1+
from pynteny.api import Command, Search, Build, Download
22
from pynteny.cli import main

pynteny/api.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,11 @@
2424

2525

2626
class Command:
27-
"""Parent class for Pynteny command"""
27+
"""
28+
Parent class for Pynteny command
2829
29-
def __init__(self):
30-
"""Parent class for Pynteny command"""
30+
args: CommandArgs
31+
"""
3132

3233
def _repr_html_(self):
3334
"""Executed by Jupyter to print Author and version in html"""

pynteny/app/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from pynteny.utils import ConfigParser
1313

1414

15-
parent_dir = Path(Path(__file__).parent)
15+
parent_dir = Path(__file__).parent
1616

1717

1818
class FileManager:

pynteny/app/main_page.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pynteny.app.components import Sidebar, Mainpage
1010

1111

12-
parent_dir = Path(Path(__file__).parent)
12+
parent_dir = Path(__file__).parent
1313
meta = metadata.metadata("pynteny")
1414
__version__ = meta["Version"]
1515
__author__ = meta["Author"]

pynteny/cli.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def _generate_cool_quotes(self):
8989
"One does not simply walk into Mordor (J.R.R. Tolkien)",
9090
"Damn, looks like a rainy day, let's do bioiformatics! (SR)",
9191
]
92-
return f"{random.choice(quotes)}\n" " "
92+
return f"{random.choice(quotes)}\n"
9393

9494
def _call_subcommand(self, subcommand_name: str) -> None:
9595
subcommand = getattr(self, subcommand_name)
@@ -182,16 +182,16 @@ def search() -> argparse.ArgumentParser:
182182
type=str,
183183
required=True,
184184
help=(
185-
f"string displaying hmm structure to search for, such as: \n"
186-
f" \n"
187-
f"'>hmm_a n_ab <hmm_b n_bc hmm_c'\n"
188-
f" \n"
189-
f"where '>' indicates a hmm target located on the positive strand, \n"
190-
f"'<' a target located on the negative strand, and n_ab cooresponds \n"
191-
f"to the maximum number of genes separating matched genes a and b. \n"
192-
f"Multiple hmms may be employed. \n"
193-
f"No order symbol in a hmm indicates that results should be independent \n"
194-
f"of strand location. "
185+
"string displaying hmm structure to search for, such as: \n"
186+
" \n"
187+
"'>hmm_a n_ab <hmm_b n_bc hmm_c'\n"
188+
" \n"
189+
"where '>' indicates a hmm target located on the positive strand, \n"
190+
"'<' a target located on the negative strand, and n_ab cooresponds \n"
191+
"to the maximum number of genes separating matched genes a and b. \n"
192+
"Multiple hmms may be employed. \n"
193+
"No order symbol in a hmm indicates that results should be independent \n"
194+
"of strand location. "
195195
),
196196
)
197197
required.add_argument(
@@ -458,7 +458,7 @@ def download() -> argparse.ArgumentParser:
458458
)
459459

460460
optional = parser._action_groups.pop()
461-
required = parser.add_argument_group("required arguments")
461+
# required = parser.add_argument_group("required arguments")
462462
parser._action_groups.append(optional)
463463

464464
optional.add_argument(

pynteny/filter.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,14 @@ def __init__(self, synteny_structure: str, unordered: bool = False) -> None:
3838
gene found by hmm_a and gene found by hmm_b, and hmm_ corresponds
3939
to the name of the hmm as provided in the keys of hmm_hits.
4040
More than two hmms can be concatenated. Strand location may be
41-
specificed by using '>' for sense and '<' for antisense.
41+
specified by using '>' for sense and '<' for antisense.
4242
unordered (bool, optional): whether the HMMs should be arranged in the
4343
exact same order displayed in the synteny_structure or in
44-
any order If ordered, the filters would filter collinear rather
44+
any order. If ordered, the filters would filter collinear rather
4545
than syntenic structures. Defaults to False.
4646
"""
4747
parsed_structure = SyntenyParser.parse_synteny_structure(synteny_structure)
48-
hmm_order_dict = dict(
49-
zip(
50-
parsed_structure["hmm_groups"],
51-
range(len(parsed_structure["hmm_groups"])),
52-
)
53-
)
54-
hmm_codes = list(hmm_order_dict.values())
48+
hmm_codes = list(range(len(parsed_structure["hmm_groups"])))
5549
self.hmm_code_order_pattern = hmm_codes
5650

5751
if unordered:
@@ -128,7 +122,7 @@ def contains_strand_pattern(self, data: pd.Series) -> int:
128122
strand_comparisons.append(data_strand == pattern_strand)
129123
else:
130124
strand_comparisons.append(True)
131-
return 1 if all(strand_comparisons) == True else 0
125+
return 1 if all(strand_comparisons) else 0
132126

133127

134128
class SyntenyHMMfilter:
@@ -257,10 +251,10 @@ def get_all_HMM_hits(self) -> pd.DataFrame:
257251
.filter(lambda x: len(x) >= self._n_hmm_groups)
258252
.sort_values(["contig", "gene_pos"], ascending=True)
259253
)
260-
all_hit_labels.reset_index(drop=True, inplace=True)
254+
all_hit_labels = all_hit_labels.reset_index(drop=True)
261255
if self._contains_hmm_groups:
262256
all_hit_labels = self._merge_hits_by_HMM_group(all_hit_labels)
263-
all_hit_labels.reset_index(drop=True, inplace=True)
257+
all_hit_labels = all_hit_labels.reset_index(drop=True)
264258
return self._add_meta_codes_to_HMM_hits(all_hit_labels)
265259

266260
def filter_hits_by_synteny_structure(self) -> dict:
@@ -296,7 +290,7 @@ def filter_hits_by_synteny_structure(self) -> dict:
296290
hmm_group: [] for hmm_group in contig_hits.hmm.unique()
297291
}
298292

299-
if len(contig_hits.hmm.unique()) >= self._n_hmm_groups:
293+
if contig_hits.hmm.nunique() >= self._n_hmm_groups:
300294

301295
hmm_match = contig_hits.hmm_code.rolling(
302296
window=self._n_hmm_groups
@@ -313,7 +307,7 @@ def filter_hits_by_synteny_structure(self) -> dict:
313307
]
314308
else:
315309
matched_rows = contig_hits[(hmm_match == 1) & (strand_match == 1)]
316-
for i, _ in matched_rows.iterrows():
310+
for i in matched_rows.index:
317311
matched_hits = contig_hits.iloc[
318312
i - (self._n_hmm_groups - 1) : i + 1, :
319313
]
@@ -379,7 +373,8 @@ def from_hits_dict(cls, hits_by_contig: dict) -> SyntenyHits:
379373
"""
380374
return cls(cls._hits_to_dataframe(hits_by_contig))
381375

382-
def get_synteny_hits(self) -> pd.DataFrame:
376+
@property
377+
def hits(self) -> pd.DataFrame:
383378
"""Return synteny hits.
384379
385380
Returns:
@@ -401,14 +396,17 @@ def add_HMM_meta_info_to_hits(self, hmm_meta: Path) -> SyntenyHits:
401396
return self._synteny_hits
402397
pgap = PGAP(hmm_meta)
403398
self._synteny_hits[fields] = ""
404-
for i, row in self._synteny_hits.iterrows():
399+
# for i, row in self._synteny_hits.iterrows():
400+
for row in self._synteny_hits.itertuples():
401+
i = getattr(row, "Index")
402+
hmm_group = getattr(row, "hmm")
405403
meta_values = [
406404
[
407405
str(v).replace("nan", "")
408406
for k, v in pgap.get_meta_info_for_HMM(hmm).items()
409407
if k != "#ncbi_accession"
410408
]
411-
for hmm in row.hmm.split("|")
409+
for hmm in hmm_group.split("|") # row.hmm.split("|")
412410
]
413411
self._synteny_hits.loc[i, fields] = ["|".join(v) for v in zip(*meta_values)]
414412
return SyntenyHits(self._synteny_hits)
@@ -532,11 +530,13 @@ def filter_FASTA_by_synteny_structure(
532530
if additional_args is None:
533531
additional_args = [None for _ in input_hmms]
534532

535-
if type(additional_args) == str:
533+
# if type(additional_args) == str:
534+
if isinstance(additional_args, str):
536535
logger.warning(f"Repeating hmmsearch arg: '{additional_args}' for all HMMs")
537536
additional_args = [additional_args for _ in input_hmms]
538537

539-
elif type(additional_args) == list:
538+
# elif type(additional_args) == list:
539+
elif isinstance(additional_args, list):
540540
if len(additional_args) == 1:
541541
logger.warning(
542542
f"Repeating hmmsearch arg: '{additional_args[0]}' for all HMMs"

pynteny/hmm.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,8 @@ def get_HMMER_tables(
103103
hmm_hits = {}
104104
for hmm_model, add_args in zip(self._input_hmms, self._additional_args):
105105
hmm_name = hmm_model.stem
106-
hmmer_output = Path(
107-
os.path.join(self._hmmer_output_dir, f"hmmer_output_{hmm_name}.txt")
108-
)
106+
hmmer_output = Path(self._hmmer_output_dir) / f"hmmer_output_{hmm_name}.txt"
107+
109108
if not (reuse_hmmer_results and os.path.isfile(hmmer_output)):
110109
wrappers.run_HMM_search(
111110
hmm_model=hmm_model,
@@ -130,10 +129,20 @@ def __init__(self, meta_file: Path):
130129
Args:
131130
meta_file (Path): path to PGAP's metadata file.
132131
"""
133-
meta = pd.read_csv(str(meta_file), sep="\t")
134-
meta = meta[
135-
["#ncbi_accession", "gene_symbol", "label", "product_name", "ec_numbers"]
136-
]
132+
meta = pd.read_csv(
133+
str(meta_file),
134+
sep="\t",
135+
usecols=[
136+
"#ncbi_accession",
137+
"gene_symbol",
138+
"label",
139+
"product_name",
140+
"ec_numbers",
141+
],
142+
)
143+
# meta = meta[
144+
# ["#ncbi_accession", "gene_symbol", "label", "product_name", "ec_numbers"]
145+
# ]
137146
self._meta = meta
138147
self._meta_file = meta_file
139148

@@ -189,17 +198,14 @@ def get_HMM_names_by_gene_symbol(self, gene_symbol: str) -> list[str]:
189198
list[str]: list of HMM names matching gene symbol.
190199
"""
191200
meta = self._meta # .dropna(subset=["gene_symbol", "label"], axis=0)
192-
try:
193-
return meta[
194-
(
195-
(meta.gene_symbol == gene_symbol)
196-
|
197-
# (meta.label.str.contains(gene_id))
198-
(meta.label == gene_symbol)
199-
)
200-
]["#ncbi_accession"].values.tolist()
201-
except:
202-
return list()
201+
return meta[
202+
(
203+
(meta.gene_symbol == gene_symbol)
204+
|
205+
# (meta.label.str.contains(gene_id))
206+
(meta.label == gene_symbol)
207+
)
208+
]["#ncbi_accession"].values.tolist()
203209

204210
def get_HMM_group_for_gene_symbol(self, gene_symbol: str) -> str:
205211
"""Get HMMs corresponding to gene symbol in PGAP metadata.
@@ -230,12 +236,7 @@ def get_HMM_gene_ID(self, hmm_name: str) -> list[str]:
230236
list[str]: list of gene symbols matching given HMM.
231237
"""
232238
meta = self._meta.dropna(subset=["#ncbi_accession"], axis=0)
233-
try:
234-
return meta[meta["#ncbi_accession"] == hmm_name][
235-
"gene_symbol"
236-
].values.tolist()
237-
except:
238-
return None
239+
return meta[meta["#ncbi_accession"] == hmm_name]["gene_symbol"].values.tolist()
239240

240241
def get_meta_info_for_HMM(self, hmm_name: str) -> dict:
241242
"""Get meta info for given hmm.
@@ -249,11 +250,10 @@ def get_meta_info_for_HMM(self, hmm_name: str) -> dict:
249250
meta = self._meta.dropna(subset=["#ncbi_accession"], axis=0).applymap(
250251
lambda x: x if not pd.isna(x) else ""
251252
)
252-
try:
253-
return {
254-
k: list(v.values())[0] if list(v.values())[0] else "undef"
255-
for k, v in meta[meta["#ncbi_accession"] == hmm_name].to_dict().items()
256-
}
257-
except:
253+
metadata = {
254+
k: list(v.values())[0] if list(v.values())[0] else "undef"
255+
for k, v in meta[meta["#ncbi_accession"] == hmm_name].to_dict().items()
256+
}
257+
if not metadata:
258258
logger.warning(f"No metadata for HMM: {hmm_name}")
259-
return dict()
259+
return metadata

pynteny/parser.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,26 @@ def parse(label: str) -> dict:
3939
"locus_pos": None,
4040
"strand": "",
4141
}
42-
try:
43-
entry = label.split("__")[0]
44-
meta = label.split("__")[1]
45-
strand = meta.split("_")[-1]
46-
locus_pos = tuple([int(pos) for pos in meta.split("_")[-3:-1]])
47-
gene_pos = int(meta.split("_")[-4])
48-
contig = "_".join(meta.split("_")[:-4])
49-
50-
parsed_dict["gene_id"] = entry
51-
parsed_dict["contig"] = contig
52-
parsed_dict["gene_pos"] = gene_pos
53-
parsed_dict["locus_pos"] = locus_pos
54-
parsed_dict["strand"] = strand
55-
except Exception:
56-
pass
42+
43+
if label.count("__") > 1:
44+
logger.error("Invalid format of record label string")
45+
sys.exit(1)
46+
47+
entry = label.split("__")[0]
48+
meta = label.split("__")[1]
49+
meta_items = meta.split("_")
50+
51+
strand = meta_items[-1]
52+
locus_pos = tuple([int(pos) for pos in meta_items[-3:-1]])
53+
gene_pos = int(meta_items[-4])
54+
contig = "_".join(meta_items[:-4])
55+
56+
parsed_dict["gene_id"] = entry
57+
parsed_dict["contig"] = contig
58+
parsed_dict["gene_pos"] = gene_pos
59+
parsed_dict["locus_pos"] = locus_pos
60+
parsed_dict["strand"] = strand
61+
5762
return parsed_dict
5863

5964
@staticmethod
@@ -107,7 +112,7 @@ def is_valid_structure(synteny_structure: str) -> bool:
107112
@staticmethod
108113
def split_strand_from_locus(
109114
locus_str: str, parsed_symbol: bool = True
110-
) -> tuple[str]:
115+
) -> tuple[str, ...]:
111116
"""Split strand info from locus tag / HMM model.
112117
113118
Args:
@@ -117,7 +122,7 @@ def split_strand_from_locus(
117122
as 'pos' and '<' as 'neg'. Defaults to True.
118123
119124
Returns:
120-
tuple[str]: tuple with parsed strand info and gene symbol / HMM name.
125+
tuple[str, ...]: tuple with parsed strand info and gene symbol / HMM name.
121126
"""
122127
locus_str = locus_str.strip()
123128
if locus_str[0] == "<" or locus_str[0] == ">":

0 commit comments

Comments
 (0)