fix: linting errors

genomehubs · Jul 8, 2024 · 1754203 · 1754203
1 parent fa4d2af
commit 1754203
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 82 deletions.
diff --git a/src/cli/commands.py b/src/cli/commands.py
@@ -33,8 +33,7 @@ def parse_args(
         description="Kinfin proteome cluster analysis tool"
     )
 
-    subparsers = parser.add_subparsers(
-        title="command", required=True, dest="command")
+    subparsers = parser.add_subparsers(title="command", required=True, dest="command")
     api_parser = subparsers.add_parser("serve", help="Start the server")
     api_parser.add_argument(
         "-p",
@@ -74,8 +73,7 @@ def parse_args(
         "--functional_annotation",
         help="Mapping of ProteinIDs to GO/IPRS/SignalP/Pfam (can be generated through 'iprs_to_table.py')",
     )
-    other_files_group.add_argument(
-        "-a", "--fasta_dir", help="Directory of FASTA files")
+    other_files_group.add_argument("-a", "--fasta_dir", help="Directory of FASTA files")
     other_files_group.add_argument(
         "-t",
         "--tree_file",

diff --git a/src/core/alo.py b/src/core/alo.py
@@ -46,9 +46,7 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
             "specific": {"true": [], "fuzzy": []},
         }
 
-        self.cluster_status_by_cluster_id: Dict[
-            str, Literal["absent", "present"]
-        ] = {}
+        self.cluster_status_by_cluster_id: Dict[str, Literal["absent", "present"]] = {}
         self.cluster_type_by_cluster_id: Dict[
             str, Literal["singleton", "shared", "specific"]
         ] = {}
@@ -61,9 +59,7 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
         self.domain_counter_by_domain_source_by_cluster_type = None
         self.protein_with_domain_count_by_domain_source_by_cluster_type = None
 
-        self.protein_length_stats_by_cluster_id: Dict[
-            str, Dict[str, int | float]
-        ] = {}
+        self.protein_length_stats_by_cluster_id: Dict[str, Dict[str, int | float]] = {}
         self.protein_count_by_cluster_id: Dict[str, int] = {}
 
     def add_cluster(
@@ -134,9 +130,7 @@ def add_cluster(
 
         self.cluster_mwu_pvalue_by_cluster_id[cluster.cluster_id] = mwu_pvalue
         self.cluster_mwu_log2_mean_by_cluster_id[cluster.cluster_id] = mwu_log2_mean
-        self.cluster_mean_ALO_count_by_cluster_id[cluster.cluster_id] = (
-            mean_ALO_count
-        )
+        self.cluster_mean_ALO_count_by_cluster_id[cluster.cluster_id] = mean_ALO_count
         self.cluster_mean_non_ALO_count_by_cluster_id[cluster.cluster_id] = (
             mean_non_ALO_count
         )
@@ -251,6 +245,4 @@ def get_proteomes(self) -> str:
         Returns:
             str: Comma-separated and sorted list of proteome IDs.
         """
-        return ", ".join(
-            sorted([str(proteome_id) for proteome_id in self.proteomes])
-        )
+        return ", ".join(sorted([str(proteome_id) for proteome_id in self.proteomes]))
diff --git a/src/core/alo_collections.py b/src/core/alo_collections.py
@@ -72,9 +72,7 @@ def compute_proteomes_by_level_by_attribute(
         }
         for proteome_id in self.level_by_attribute_by_proteome_id:
             for attribute in self.attributes:
-                level = self.level_by_attribute_by_proteome_id[proteome_id][
-                    attribute
-                ]
+                level = self.level_by_attribute_by_proteome_id[proteome_id][attribute]
                 if level not in proteomes_by_level_by_attribute[attribute]:
                     proteomes_by_level_by_attribute[attribute][level] = set()
                 proteomes_by_level_by_attribute[attribute][level].add(proteome_id)
@@ -96,9 +94,7 @@ def create_ALOs(self) -> Dict[str, Dict[str, Optional[AttributeLevel]]]:
         }
         for attribute in self.proteome_ids_by_level_by_attribute:
             for level in self.proteome_ids_by_level_by_attribute[attribute]:
-                proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][
-                    level
-                ]
+                proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][level]
                 ALO = AttributeLevel(
                     #
                     attribute=attribute,
@@ -312,15 +308,11 @@ def plot_tree(
             node.set_style(style)
             if header_f_by_node_name[node.name]:
                 # must be PNG! (ETE can't do PDF Faces)
-                node_header_face = ete3.faces.ImgFace(
-                    header_f_by_node_name[node.name]
-                )
+                node_header_face = ete3.faces.ImgFace(header_f_by_node_name[node.name])
                 node.add_face(node_header_face, column=0, position="branch-top")
             if charts_f_by_node_name[node.name]:
                 # must be PNG! (ETE can't do PDF Faces)
-                node_chart_face = ete3.faces.ImgFace(
-                    charts_f_by_node_name[node.name]
-                )
+                node_chart_face = ete3.faces.ImgFace(charts_f_by_node_name[node.name])
                 node.add_face(node_chart_face, column=0, position="branch-bottom")
             node_name_face = ete3.TextFace(node.name, fsize=64)
             node.img_style["size"] = 10
@@ -396,10 +388,7 @@ def write_tree(
             for synapomorphic_cluster_string in node.synapomorphic_cluster_strings:  # type: ignore
                 node_clusters.append(
                     "\t".join(
-                        [
-                            str(string)
-                            for string in list(synapomorphic_cluster_string)
-                        ]
+                        [str(string) for string in list(synapomorphic_cluster_string)]
                     )
                 )
             node_stats_line = [
@@ -463,13 +452,13 @@ def compute_repetition_for_rarefaction_curve(
                 sample_size = idx + 1
                 if (
                     sample_size
-                    not in rarefaction_by_samplesize_by_level_by_attribute[
-                        attribute
-                    ][level]
-                ):
-                    rarefaction_by_samplesize_by_level_by_attribute[attribute][
+                    not in rarefaction_by_samplesize_by_level_by_attribute[attribute][
                         level
-                    ][sample_size] = []
+                    ]
+                ):
+                    rarefaction_by_samplesize_by_level_by_attribute[attribute][level][
+                        sample_size
+                    ] = []
                 rarefaction_by_samplesize_by_level_by_attribute[attribute][level][
                     sample_size
                 ].append(len(seen_cluster_ids))
@@ -496,9 +485,7 @@ def compute_rarefaction_data(
         logger.info("[STATUS] - Generating rarefaction data ...")
         for attribute in self.attributes:
             for level in self.proteome_ids_by_level_by_attribute[attribute]:
-                proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][
-                    level
-                ]
+                proteome_ids = self.proteome_ids_by_level_by_attribute[attribute][level]
                 if len(proteome_ids) == 1:
                     continue
 

diff --git a/src/core/build.py b/src/core/build.py
@@ -137,14 +137,10 @@ def parse_domains_from_functional_annotations_file(
                         if domain_source == "GO":
                             domain_id = domain_id_count
                         else:
-                            domain_id, domain_count_str = domain_id_count.rsplit(
-                                ":", 2
-                            )
+                            domain_id, domain_count_str = domain_id_count.rsplit(":", 2)
                             domain_count = int(domain_count_str)
                         domain_counts_by_domain_id[domain_id] = domain_count
-                    domain_counter: Counter[str] = Counter(
-                        domain_counts_by_domain_id
-                    )
+                    domain_counter: Counter[str] = Counter(domain_counts_by_domain_id)
                     domain_counter_by_domain_source[domain_source] = domain_counter
             proteinCollection.add_annotation_to_protein(
                 domain_protein_id=domain_protein_id,
@@ -290,9 +286,7 @@ def get_protein_list_from_seq_f(sequence_ids_f: str, aloCollection: AloCollectio
             .replace(")", "_")
         )  # orthofinder replaces characters
         species_id = sequence_id.split("_")[0]
-        if proteome_id := aloCollection.proteome_id_by_species_id.get(
-            species_id, None
-        ):
+        if proteome_id := aloCollection.proteome_id_by_species_id.get(species_id, None):
             protein = Protein(protein_id, proteome_id, species_id, sequence_id)
             proteins_list.append(protein)
         # else:

diff --git a/src/core/clusters.py b/src/core/clusters.py
@@ -25,12 +25,12 @@ def __init__(
             }
         except KeyError as e:
             error_msg = f"[ERROR] - Protein {e.args[0]} in clustering belongs to proteomes that are not present in the config-file."
-            error_msg += "Please add those proteomes or recluster by omitting these proteomes."
+            error_msg += (
+                "Please add those proteomes or recluster by omitting these proteomes."
+            )
             raise KeyError(error_msg) from e
 
-        self.proteome_ids_list: List[str] = list(
-            self.proteomes_by_protein_id.values()
-        )
+        self.proteome_ids_list: List[str] = list(self.proteomes_by_protein_id.values())
         self.protein_count_by_proteome_id: Counter[str] = Counter(
             self.proteome_ids_list
         )
@@ -45,9 +45,7 @@ def __init__(
         self.protein_counts_of_proteomes_by_level_by_attribute: Dict[
             str, Dict[str, List[int]]
         ] = {}
-        self.proteome_coverage_by_level_by_attribute: Dict[str, Dict[str, float]] = (
-            {}
-        )
+        self.proteome_coverage_by_level_by_attribute: Dict[str, Dict[str, float]] = {}
         self.implicit_protein_ids_by_proteome_id_by_level_by_attribute: Dict[
             str, Dict[str, Dict[str, List[str]]]
         ] = {}
@@ -59,10 +57,8 @@ def __init__(
         self.protein_length_stats: Optional[Dict[str, float]] = (
             self.compute_protein_length_stats(proteinCollection, self.protein_ids)
         )
-        self.secreted_cluster_coverage: float = (
-            self.compute_secreted_cluster_coverage(
-                proteinCollection, self.protein_ids, self.protein_count
-            )
+        self.secreted_cluster_coverage: float = self.compute_secreted_cluster_coverage(
+            proteinCollection, self.protein_ids, self.protein_count
         )
         self.domain_counter_by_domain_source: Dict[str, Counter[str]] = (
             self.compute_domain_counter_by_domain_source(

diff --git a/src/core/logic.py b/src/core/logic.py
@@ -136,9 +136,7 @@ def parse_attributes_from_config_file(
             proteomes.add(proteome_id)
             proteome_id_by_species_id[species_id] = proteome_id
 
-            level_by_attribute_by_proteome_id[proteome_id] = dict(
-                zip(attributes, temp)
-            )
+            level_by_attribute_by_proteome_id[proteome_id] = dict(zip(attributes, temp))
             level_by_attribute_by_proteome_id[proteome_id]["all"] = "all"
     attributes.insert(0, "all")  # append to front
     return (
@@ -181,9 +179,7 @@ def add_taxid_attributes(
 
         # add lineage attribute/levels
         for taxrank in taxranks:
-            level_by_attribute_by_proteome_id[proteome_id][taxrank] = lineage[
-                taxrank
-            ]
+            level_by_attribute_by_proteome_id[proteome_id][taxrank] = lineage[taxrank]
 
         # remove taxid-levels
         del level_by_attribute_by_proteome_id[proteome_id]["TAXID"]
@@ -452,9 +448,7 @@ def parse_go_mapping(go_mapping_f: str) -> Dict[str, str]:
         if not line.startswith("!"):
             temp: List[str] = line.replace(" > ", "|").split("|")
             go_string: List[str] = temp[1].split(";")
-            go_desc, go_id = go_string[0].replace("GO:", ""), go_string[1].lstrip(
-                " "
-            )
+            go_desc, go_id = go_string[0].replace("GO:", ""), go_string[1].lstrip(" ")
 
             if go_id not in go_mapping_dict:
                 go_mapping_dict[go_id] = go_desc

diff --git a/src/core/utils.py b/src/core/utils.py
@@ -217,10 +217,7 @@ def statistic(
     implicit_count_1: List[float] = [count for count in count_1 if count > 0]
     implicit_count_2: List[float] = [count for count in count_2 if count > 0]
 
-    if (
-        len(implicit_count_1) < min_proteomes
-        or len(implicit_count_2) < min_proteomes
-    ):
+    if len(implicit_count_1) < min_proteomes or len(implicit_count_2) < min_proteomes:
         return None, None, None, None
 
     mean_count_1 = mean(implicit_count_1)
@@ -255,9 +252,7 @@ def statistic(
             pvalue = 1.0
     elif test == "ttest":
         # try:
-        pvalue = scipy.stats.ttest_ind(implicit_count_1, implicit_count_2)[
-            1
-        ]  # t-test
+        pvalue = scipy.stats.ttest_ind(implicit_count_1, implicit_count_2)[1]  # t-test
         if pvalue != pvalue:  # testing for "nan"
             pvalue = 1.0
     elif test == "ks":

diff --git a/src/main.py b/src/main.py
@@ -41,13 +41,12 @@
             logger.error("[ERROR] CLUSTER_FILE_PATH should be an absolute path.")
             sys.exit(1)
         if sequence_ids_f is None or not os.path.isabs(sequence_ids_f):
-            logger.error(
-                "[ERROR] SEQUENCE_IDS_FILE_PATH should be an absolute path.")
+            logger.error("[ERROR] SEQUENCE_IDS_FILE_PATH should be an absolute path.")
             sys.exit(1)
-        if taxon_idx_mapping_file is None or not os.path.isabs(
-                taxon_idx_mapping_file):
+        if taxon_idx_mapping_file is None or not os.path.isabs(taxon_idx_mapping_file):
             logger.error(
-                "[ERROR] TAXON_IDX_MAPPING_FILE_PATH should be an absolute path.")
+                "[ERROR] TAXON_IDX_MAPPING_FILE_PATH should be an absolute path."
+            )
             sys.exit(1)
         if results_base_dir is None or not os.path.isabs(results_base_dir):
             logger.error("[ERROR] RESULTS_BASE_DIR should be an absolute path.")