Skip to content

Commit

Permalink
fix: linting errors
Browse files Browse the repository at this point in the history
  • Loading branch information
rohan-b-84 committed Jul 8, 2024
1 parent fa4d2af commit 1754203
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 82 deletions.
6 changes: 2 additions & 4 deletions src/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def parse_args(
description="Kinfin proteome cluster analysis tool"
)

subparsers = parser.add_subparsers(
title="command", required=True, dest="command")
subparsers = parser.add_subparsers(title="command", required=True, dest="command")
api_parser = subparsers.add_parser("serve", help="Start the server")
api_parser.add_argument(
"-p",
Expand Down Expand Up @@ -74,8 +73,7 @@ def parse_args(
"--functional_annotation",
help="Mapping of ProteinIDs to GO/IPRS/SignalP/Pfam (can be generated through 'iprs_to_table.py')",
)
other_files_group.add_argument(
"-a", "--fasta_dir", help="Directory of FASTA files")
other_files_group.add_argument("-a", "--fasta_dir", help="Directory of FASTA files")
other_files_group.add_argument(
"-t",
"--tree_file",
Expand Down
16 changes: 4 additions & 12 deletions src/core/alo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
"specific": {"true": [], "fuzzy": []},
}

self.cluster_status_by_cluster_id: Dict[
str, Literal["absent", "present"]
] = {}
self.cluster_status_by_cluster_id: Dict[str, Literal["absent", "present"]] = {}
self.cluster_type_by_cluster_id: Dict[
str, Literal["singleton", "shared", "specific"]
] = {}
Expand All @@ -61,9 +59,7 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
self.domain_counter_by_domain_source_by_cluster_type = None
self.protein_with_domain_count_by_domain_source_by_cluster_type = None

self.protein_length_stats_by_cluster_id: Dict[
str, Dict[str, int | float]
] = {}
self.protein_length_stats_by_cluster_id: Dict[str, Dict[str, int | float]] = {}
self.protein_count_by_cluster_id: Dict[str, int] = {}

def add_cluster(
Expand Down Expand Up @@ -134,9 +130,7 @@ def add_cluster(

self.cluster_mwu_pvalue_by_cluster_id[cluster.cluster_id] = mwu_pvalue
self.cluster_mwu_log2_mean_by_cluster_id[cluster.cluster_id] = mwu_log2_mean
self.cluster_mean_ALO_count_by_cluster_id[cluster.cluster_id] = (
mean_ALO_count
)
self.cluster_mean_ALO_count_by_cluster_id[cluster.cluster_id] = mean_ALO_count
self.cluster_mean_non_ALO_count_by_cluster_id[cluster.cluster_id] = (
mean_non_ALO_count
)
Expand Down Expand Up @@ -251,6 +245,4 @@ def get_proteomes(self) -> str:
Returns:
str: Comma-separated and sorted list of proteome IDs.
"""
return ", ".join(
sorted([str(proteome_id) for proteome_id in self.proteomes])
)
return ", ".join(sorted([str(proteome_id) for proteome_id in self.proteomes]))
37 changes: 12 additions & 25 deletions src/core/alo_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ def compute_proteomes_by_level_by_attribute(
}
for proteome_id in self.level_by_attribute_by_proteome_id:
for attribute in self.attributes:
level = self.level_by_attribute_by_proteome_id[proteome_id][
attribute
]
level = self.level_by_attribute_by_proteome_id[proteome_id][attribute]
if level not in proteomes_by_level_by_attribute[attribute]:
proteomes_by_level_by_attribute[attribute][level] = set()
proteomes_by_level_by_attribute[attribute][level].add(proteome_id)
Expand All @@ -96,9 +94,7 @@ def create_ALOs(self) -> Dict[str, Dict[str, Optional[AttributeLevel]]]:
}
for attribute in self.proteome_ids_by_level_by_attribute:
for level in self.proteome_ids_by_level_by_attribute[attribute]:
proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][
level
]
proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][level]
ALO = AttributeLevel(
#
attribute=attribute,
Expand Down Expand Up @@ -312,15 +308,11 @@ def plot_tree(
node.set_style(style)
if header_f_by_node_name[node.name]:
# must be PNG! (ETE can't do PDF Faces)
node_header_face = ete3.faces.ImgFace(
header_f_by_node_name[node.name]
)
node_header_face = ete3.faces.ImgFace(header_f_by_node_name[node.name])
node.add_face(node_header_face, column=0, position="branch-top")
if charts_f_by_node_name[node.name]:
# must be PNG! (ETE can't do PDF Faces)
node_chart_face = ete3.faces.ImgFace(
charts_f_by_node_name[node.name]
)
node_chart_face = ete3.faces.ImgFace(charts_f_by_node_name[node.name])
node.add_face(node_chart_face, column=0, position="branch-bottom")
node_name_face = ete3.TextFace(node.name, fsize=64)
node.img_style["size"] = 10
Expand Down Expand Up @@ -396,10 +388,7 @@ def write_tree(
for synapomorphic_cluster_string in node.synapomorphic_cluster_strings: # type: ignore
node_clusters.append(
"\t".join(
[
str(string)
for string in list(synapomorphic_cluster_string)
]
[str(string) for string in list(synapomorphic_cluster_string)]
)
)
node_stats_line = [
Expand Down Expand Up @@ -463,13 +452,13 @@ def compute_repetition_for_rarefaction_curve(
sample_size = idx + 1
if (
sample_size
not in rarefaction_by_samplesize_by_level_by_attribute[
attribute
][level]
):
rarefaction_by_samplesize_by_level_by_attribute[attribute][
not in rarefaction_by_samplesize_by_level_by_attribute[attribute][
level
][sample_size] = []
]
):
rarefaction_by_samplesize_by_level_by_attribute[attribute][level][
sample_size
] = []
rarefaction_by_samplesize_by_level_by_attribute[attribute][level][
sample_size
].append(len(seen_cluster_ids))
Expand All @@ -496,9 +485,7 @@ def compute_rarefaction_data(
logger.info("[STATUS] - Generating rarefaction data ...")
for attribute in self.attributes:
for level in self.proteome_ids_by_level_by_attribute[attribute]:
proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][
level
]
proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][level]
if len(proteome_ids) == 1:
continue

Expand Down
12 changes: 3 additions & 9 deletions src/core/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,10 @@ def parse_domains_from_functional_annotations_file(
if domain_source == "GO":
domain_id = domain_id_count
else:
domain_id, domain_count_str = domain_id_count.rsplit(
":", 2
)
domain_id, domain_count_str = domain_id_count.rsplit(":", 2)
domain_count = int(domain_count_str)
domain_counts_by_domain_id[domain_id] = domain_count
domain_counter: Counter[str] = Counter(
domain_counts_by_domain_id
)
domain_counter: Counter[str] = Counter(domain_counts_by_domain_id)
domain_counter_by_domain_source[domain_source] = domain_counter
proteinCollection.add_annotation_to_protein(
domain_protein_id=domain_protein_id,
Expand Down Expand Up @@ -290,9 +286,7 @@ def get_protein_list_from_seq_f(sequence_ids_f: str, aloCollection: AloCollectio
.replace(")", "_")
) # orthofinder replaces characters
species_id = sequence_id.split("_")[0]
if proteome_id := aloCollection.proteome_id_by_species_id.get(
species_id, None
):
if proteome_id := aloCollection.proteome_id_by_species_id.get(species_id, None):
protein = Protein(protein_id, proteome_id, species_id, sequence_id)
proteins_list.append(protein)
# else:
Expand Down
18 changes: 7 additions & 11 deletions src/core/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ def __init__(
}
except KeyError as e:
error_msg = f"[ERROR] - Protein {e.args[0]} in clustering belongs to proteomes that are not present in the config-file."
error_msg += "Please add those proteomes or recluster by omitting these proteomes."
error_msg += (
"Please add those proteomes or recluster by omitting these proteomes."
)
raise KeyError(error_msg) from e

self.proteome_ids_list: List[str] = list(
self.proteomes_by_protein_id.values()
)
self.proteome_ids_list: List[str] = list(self.proteomes_by_protein_id.values())
self.protein_count_by_proteome_id: Counter[str] = Counter(
self.proteome_ids_list
)
Expand All @@ -45,9 +45,7 @@ def __init__(
self.protein_counts_of_proteomes_by_level_by_attribute: Dict[
str, Dict[str, List[int]]
] = {}
self.proteome_coverage_by_level_by_attribute: Dict[str, Dict[str, float]] = (
{}
)
self.proteome_coverage_by_level_by_attribute: Dict[str, Dict[str, float]] = {}
self.implicit_protein_ids_by_proteome_id_by_level_by_attribute: Dict[
str, Dict[str, Dict[str, List[str]]]
] = {}
Expand All @@ -59,10 +57,8 @@ def __init__(
self.protein_length_stats: Optional[Dict[str, float]] = (
self.compute_protein_length_stats(proteinCollection, self.protein_ids)
)
self.secreted_cluster_coverage: float = (
self.compute_secreted_cluster_coverage(
proteinCollection, self.protein_ids, self.protein_count
)
self.secreted_cluster_coverage: float = self.compute_secreted_cluster_coverage(
proteinCollection, self.protein_ids, self.protein_count
)
self.domain_counter_by_domain_source: Dict[str, Counter[str]] = (
self.compute_domain_counter_by_domain_source(
Expand Down
12 changes: 3 additions & 9 deletions src/core/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,7 @@ def parse_attributes_from_config_file(
proteomes.add(proteome_id)
proteome_id_by_species_id[species_id] = proteome_id

level_by_attribute_by_proteome_id[proteome_id] = dict(
zip(attributes, temp)
)
level_by_attribute_by_proteome_id[proteome_id] = dict(zip(attributes, temp))
level_by_attribute_by_proteome_id[proteome_id]["all"] = "all"
attributes.insert(0, "all") # append to front
return (
Expand Down Expand Up @@ -181,9 +179,7 @@ def add_taxid_attributes(

# add lineage attribute/levels
for taxrank in taxranks:
level_by_attribute_by_proteome_id[proteome_id][taxrank] = lineage[
taxrank
]
level_by_attribute_by_proteome_id[proteome_id][taxrank] = lineage[taxrank]

# remove taxid-levels
del level_by_attribute_by_proteome_id[proteome_id]["TAXID"]
Expand Down Expand Up @@ -452,9 +448,7 @@ def parse_go_mapping(go_mapping_f: str) -> Dict[str, str]:
if not line.startswith("!"):
temp: List[str] = line.replace(" > ", "|").split("|")
go_string: List[str] = temp[1].split(";")
go_desc, go_id = go_string[0].replace("GO:", ""), go_string[1].lstrip(
" "
)
go_desc, go_id = go_string[0].replace("GO:", ""), go_string[1].lstrip(" ")

if go_id not in go_mapping_dict:
go_mapping_dict[go_id] = go_desc
Expand Down
9 changes: 2 additions & 7 deletions src/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,7 @@ def statistic(
implicit_count_1: List[float] = [count for count in count_1 if count > 0]
implicit_count_2: List[float] = [count for count in count_2 if count > 0]

if (
len(implicit_count_1) < min_proteomes
or len(implicit_count_2) < min_proteomes
):
if len(implicit_count_1) < min_proteomes or len(implicit_count_2) < min_proteomes:
return None, None, None, None

mean_count_1 = mean(implicit_count_1)
Expand Down Expand Up @@ -255,9 +252,7 @@ def statistic(
pvalue = 1.0
elif test == "ttest":
# try:
pvalue = scipy.stats.ttest_ind(implicit_count_1, implicit_count_2)[
1
] # t-test
pvalue = scipy.stats.ttest_ind(implicit_count_1, implicit_count_2)[1] # t-test
if pvalue != pvalue: # testing for "nan"
pvalue = 1.0
elif test == "ks":
Expand Down
9 changes: 4 additions & 5 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,12 @@
logger.error("[ERROR] CLUSTER_FILE_PATH should be an absolute path.")
sys.exit(1)
if sequence_ids_f is None or not os.path.isabs(sequence_ids_f):
logger.error(
"[ERROR] SEQUENCE_IDS_FILE_PATH should be an absolute path.")
logger.error("[ERROR] SEQUENCE_IDS_FILE_PATH should be an absolute path.")
sys.exit(1)
if taxon_idx_mapping_file is None or not os.path.isabs(
taxon_idx_mapping_file):
if taxon_idx_mapping_file is None or not os.path.isabs(taxon_idx_mapping_file):
logger.error(
"[ERROR] TAXON_IDX_MAPPING_FILE_PATH should be an absolute path.")
"[ERROR] TAXON_IDX_MAPPING_FILE_PATH should be an absolute path."
)
sys.exit(1)
if results_base_dir is None or not os.path.isabs(results_base_dir):
logger.error("[ERROR] RESULTS_BASE_DIR should be an absolute path.")
Expand Down

0 comments on commit 1754203

Please sign in to comment.