diff --git a/src/api/endpoints.py b/src/api/endpoints.py
index f7a853c..04f4bbc 100644
--- a/src/api/endpoints.py
+++ b/src/api/endpoints.py
@@ -1,3 +1,5 @@
+import asyncio
+import json
 import os
 from typing import Dict, List
 
@@ -7,8 +9,6 @@
 from pydantic import BaseModel
 
 from api.sessions import session_manager
-from core.input import InputData
-from core.results import analyse
 
 
 class InputSchema(BaseModel):
@@ -21,11 +21,26 @@ class InputSchema(BaseModel):
 router = APIRouter()
 
 
+async def run_cli_command(command: list):
+    process = await asyncio.create_subprocess_exec(
+        *command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+    )
+
+    stdout, stderr = await process.communicate()
+
+    stdout = stdout.decode().strip()
+    stderr = stderr.decode().strip()
+
+    if process.returncode != 0:
+        raise RuntimeError(
+            f"CLI command failed with return code {process.returncode}: {stderr}"
+        )
+
+    return stdout
+
+
 @router.post("/init")
-async def initialize(
-    input_data: InputSchema,
-    background_tasks: BackgroundTasks,
-) -> JSONResponse:
+async def initialize(input_data: InputSchema) -> JSONResponse:
     """
     Initialize the analysis process.
 
@@ -53,20 +68,29 @@ async def initialize(
             )
 
         session_id, result_dir = session_manager.new()
-        data = InputData(
-            nodesdb_f=session_manager.nodesdb_f,
-            go_mapping_f=session_manager.go_mapping_f,
-            pfam_mapping_f=session_manager.pfam_mapping_f,
-            sequence_ids_file=session_manager.sequence_ids_f,
-            ipr_mapping_f=session_manager.ipr_mapping_f,
-            cluster_file=session_manager.cluster_f,
-            config_data=input_data.config,
-            taxon_idx_mapping_file=session_manager.taxon_idx_mapping_file,
-            output_path=result_dir,
-            plot_format="png",  # as we require images
-        )
-
-        background_tasks.add_task(analyse, data)
+        os.makedirs(result_dir, exist_ok=True)
+        config_f = os.path.join(result_dir, "config.json")
+
+        with open(config_f, "w") as file:
+            json.dump(input_data.config, file)
+
+        command = [
+            "python",
+            "src/main.py",
+            "analyse",
+            "-g",
+            session_manager.cluster_f,
+            "-c",
+            config_f,
+            "-s",
+            session_manager.sequence_ids_f,
+            "-m",
+            session_manager.taxon_idx_mapping_file,
+            "-o",
+            result_dir,
+        ]
+
+        asyncio.create_task(run_cli_command(command))
 
         return JSONResponse(
             content={"detail": "Analysis task has been queued."},
diff --git a/src/cli/commands.py b/src/cli/commands.py
index ac55f2e..6a8d176 100644
--- a/src/cli/commands.py
+++ b/src/cli/commands.py
@@ -1,5 +1,6 @@
 import argparse
 import sys
+from typing import Union
 
 from cli.validate import validate_cli_args
 from core.config import SUPPORTED_PLOT_FORMATS, SUPPORTED_TAXRANKS, SUPPORTED_TESTS
@@ -13,7 +14,7 @@ def parse_args(
     pfam_mapping_f: str,
     ipr_mapping_f: str,
     go_mapping_f: str,
-) -> ServeArgs | InputData:
+) -> Union[ServeArgs, InputData]:
     """Parse command-line arguments.
 
     Args:
@@ -68,6 +69,9 @@ def parse_args(
     other_files_group.add_argument(
         "-p", "--species_ids_file", help="SpeciesIDs.txt used in OrthoFinder"
     )
+    other_files_group.add_argument(
+        "-m", "--taxon_idx_mapping", help="TAXON IDX Mapping File"
+    )
     other_files_group.add_argument(
         "-f",
         "--functional_annotation",
@@ -179,7 +183,7 @@ def parse_args(
 
         return InputData(
             cluster_file=args.cluster_file,
-            config_data=args.config_file,
+            config_f=args.config_file,
             sequence_ids_file=args.sequence_ids_file,
             species_ids_file=args.species_ids_file,
             functional_annotation_f=args.functional_annotation,
@@ -202,6 +206,7 @@ def parse_args(
             pfam_mapping_f=pfam_mapping_f,
             ipr_mapping_f=ipr_mapping_f,
             go_mapping_f=go_mapping_f,
+            taxon_idx_mapping_file=args.taxon_idx_mapping,
         )
     else:
         sys.exit()
diff --git a/src/core/alo.py b/src/core/alo.py
index e61846a..378c83c 100644
--- a/src/core/alo.py
+++ b/src/core/alo.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Literal, Optional, Set
+from typing import Dict, List, Literal, Optional, Set, Union
 
 from core.clusters import Cluster
 
@@ -33,7 +33,7 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
             "shared": [],
         }
 
-        self.protein_span_by_cluster_type: Dict[str, List[int | float]] = {
+        self.protein_span_by_cluster_type: Dict[str, List[Union[int, float]]] = {
             "singleton": [],
             "specific": [],
             "shared": [],
@@ -59,7 +59,9 @@ def __init__(self, attribute: str, level: str, proteomes: Set[str]) -> None:
         self.domain_counter_by_domain_source_by_cluster_type = None
         self.protein_with_domain_count_by_domain_source_by_cluster_type = None
 
-        self.protein_length_stats_by_cluster_id: Dict[str, Dict[str, int | float]] = {}
+        self.protein_length_stats_by_cluster_id: Dict[
+            str, Dict[str, Union[int, float]]
+        ] = {}
         self.protein_count_by_cluster_id: Dict[str, int] = {}
 
     def add_cluster(
@@ -67,7 +69,7 @@ def add_cluster(
         cluster: Cluster,
         attribute_cluster_type: Literal["singleton", "shared", "specific"],
         ALO_cluster_status: Literal["absent", "present"],
-        ALO_protein_length_stats: Dict[str, int | float],
+        ALO_protein_length_stats: Dict[str, Union[int, float]],
         ALO_protein_ids_in_cluster: List[str],
         ALO_cluster_cardinality: Optional[str],
         mwu_pvalue: Optional[float],
@@ -84,7 +86,7 @@ def add_cluster(
                 Type of the cluster as either 'singleton', 'shared', or 'specific'.
             ALO_cluster_status (Literal["absent", "present"]):
                 Status of the cluster, either 'absent' or 'present'.
-            ALO_protein_length_stats (Dict[str, int | float]):
+            ALO_protein_length_stats (Dict[str, Union[int, float]]):
                 Length statistics of proteins in the cluster.
             ALO_protein_ids_in_cluster (List[str]):
                 List of protein IDs present in the cluster.
@@ -192,7 +194,7 @@ def get_cluster_count_by_cluster_status_by_cluster_type(
                 ]
             )
 
-    def get_protein_span_by_cluster_type(self, cluster_type: str) -> int | float:
+    def get_protein_span_by_cluster_type(self, cluster_type: str) -> Union[int, float]:
         """
         Get the total span of proteins for a specific cluster type.
 
@@ -201,7 +203,7 @@ def get_protein_span_by_cluster_type(self, cluster_type: str) -> int | float:
                 Use "total" to get the total span across all cluster types.
 
         Returns:
-            int | float: Total span of proteins in the specified cluster type.
+            Union[int, float]: Total span of proteins in the specified cluster type.
                 If 'cluster_type' is "total", returns the sum of spans across all
                 cluster types.
         """
diff --git a/src/core/alo_collections.py b/src/core/alo_collections.py
index 05b7022..b7712fb 100644
--- a/src/core/alo_collections.py
+++ b/src/core/alo_collections.py
@@ -186,7 +186,7 @@ def generate_header_for_node(self, node: ete3.TreeNode, dirs: Dict[str, str]):
         table.scale(2, 1)
         for key, cell in list(table.get_celld().items()):
             row, col = key
-            cell._text.set_color("grey")
+            cell._text.set_color("grey")  # type:ignore
             cell.set_edgecolor("darkgrey")
             cell.visible_edges = "T" if row > 0 else "B"
             if row == len(data) - 2:
diff --git a/src/core/build.py b/src/core/build.py
index 7a56fe0..d35530c 100644
--- a/src/core/build.py
+++ b/src/core/build.py
@@ -1,5 +1,7 @@
-from collections import Counter
-from typing import Any, Dict, List, Optional
+from collections import Counter, OrderedDict, defaultdict
+import json
+import os
+from typing import Any, Dict, List, Optional, Set, Union
 
 from ete3 import Tree
 
@@ -7,8 +9,7 @@
 from core.clusters import Cluster, ClusterCollection
 from core.logic import (
     add_taxid_attributes,
-    parse_attributes_from_config_file,
-    parse_attributes_from_json,
+    parse_attributes_from_config_data,
     parse_fasta_dir,
     parse_go_mapping,
     parse_ipr_mapping,
@@ -56,33 +57,107 @@ def get_singletons(
 
 
 def parse_cluster_file(
+    output_dir: str,
     cluster_f: str,
     proteinCollection: ProteinCollection,
+    available_proteomes: Set[str],
 ) -> List[Cluster]:
     """
     Parses a cluster file to create Cluster objects and updates protein information.
+    Saves the filtered clustering data and stats to files.
 
     Args:
+        output_dir (str): Base directory path for saving files.
         cluster_f (str): Path to the cluster file.
         proteinCollection (ProteinCollection): Collection of Protein objects.
+        available_proteomes (Set[str]): Set of all available proteomes.
 
     Returns:
-        List[Cluster]: List of Cluster objects created from the file.
+        Tuple[List[Cluster], Dict[str, any]]: List of Cluster objects and stats.
 
     Raises:
         FileNotFoundError: If the cluster file `cluster_f` does not exist.
     """
     cluster_list: List[Cluster] = []
-    with open(cluster_f) as fh:
+    stats = {
+        "total_clusters": 0,
+        "total_proteins": 0,
+        "total_proteomes": len(available_proteomes),
+        "filtered_clusters": 0,
+        "filtered_proteins": 0,
+        "included_proteins": [],
+        "excluded_proteins": [],
+        "included_proteomes": defaultdict(int),
+        "excluded_proteomes": defaultdict(int),
+    }
+
+    output_filtered_file = os.path.join(output_dir, "filtered_orthogroups.txt")
+    stats_file = os.path.join(output_dir, "filtering_summary.json")
+
+    with open(cluster_f) as fh, open(output_filtered_file, "w") as ofh:
         for line in fh:
+            stats["total_clusters"] += 1
             temp: List[str] = line.rstrip("\n").split(" ")
             cluster_id, protein_ids = temp[0].replace(":", ""), temp[1:]
             protein_ids = [protein_id for protein_id in protein_ids if protein_id]
-            cluster = Cluster(cluster_id, protein_ids, proteinCollection)
+
+            filtered_protein_ids = []
             for protein_id in protein_ids:
-                protein = proteinCollection.proteins_by_protein_id[protein_id]
-                protein.clustered = True
-            cluster_list.append(cluster)
+                proteome_id = protein_id.split(".")[0]  # Extract proteome ID
+                if proteome_id in available_proteomes:
+                    filtered_protein_ids.append(protein_id)
+                    stats["included_proteins"].append(protein_id)
+                    stats["included_proteomes"][proteome_id] += 1
+                else:
+                    stats["excluded_proteins"].append(protein_id)
+                    stats["excluded_proteomes"][proteome_id] += 1
+
+            stats["total_proteins"] += len(protein_ids)
+            stats["filtered_proteins"] += len(filtered_protein_ids)
+
+            if filtered_protein_ids:
+                # Only create a cluster if there are proteins left after filtering
+                cluster = Cluster(cluster_id, filtered_protein_ids, proteinCollection)
+                for protein_id in filtered_protein_ids:
+                    protein = proteinCollection.proteins_by_protein_id[protein_id]
+                    protein.clustered = True
+                cluster_list.append(cluster)
+
+                ofh.write(f"{cluster_id}: {', '.join(filtered_protein_ids)}\n")
+                stats["filtered_clusters"] += 1
+
+    stats["included_proteins_count"] = len(set(stats["included_proteins"]))
+    stats["excluded_proteins_count"] = len(set(stats["excluded_proteins"]))
+
+    # Convert proteome counts to lists of counts for JSON serialization
+    stats["included_proteomes"] = dict(stats["included_proteomes"])
+    stats["excluded_proteomes"] = dict(stats["excluded_proteomes"])
+
+    # Reorder stats
+    ordered_stats = OrderedDict(
+        [
+            ("total_clusters", stats["total_clusters"]),
+            ("total_proteins", stats["total_proteins"]),
+            ("total_proteomes", stats["total_proteomes"]),
+            ("filtered_clusters", stats["filtered_clusters"]),
+            ("filtered_proteins", stats["filtered_proteins"]),
+            ("included_proteins_count", stats["included_proteins_count"]),
+            ("excluded_proteins_count", stats["excluded_proteins_count"]),
+            ("included_proteomes", stats["included_proteomes"]),
+            ("excluded_proteomes", stats["excluded_proteomes"]),
+            ("included_proteins", stats["included_proteins"]),
+            ("excluded_proteins", stats["excluded_proteins"]),
+        ]
+    )
+
+    with open(stats_file, "w") as mf:
+        json.dump(
+            ordered_stats,
+            mf,
+            separators=(", ", ": "),
+            indent=4,
+        )
+
     return cluster_list
 
 
@@ -155,12 +230,13 @@ def parse_domains_from_functional_annotations_file(
     proteinCollection.functional_annotation_parsed = True
 
 
-# cli
+# common
 def build_AloCollection(
     config_f: str,
     nodesdb_f: str,
     taxranks: List[str],
     tree_f: Optional[str],
+    taxon_idx_mapping_file: Optional[str],
 ) -> AloCollection:
     """
     Builds an AloCollection object from command-line interface (CLI) inputs.
@@ -179,8 +255,7 @@ def build_AloCollection(
         proteome_id_by_species_id,
         attributes,
         level_by_attribute_by_proteome_id,
-    ) = parse_attributes_from_config_file(config_f)
-
+    ) = parse_attributes_from_config_data(config_f, taxon_idx_mapping_file)
     # Add taxonomy if needed
     if "TAXID" in set(attributes):
         logger.info(
@@ -212,71 +287,9 @@ def build_AloCollection(
     )
 
 
-# api
-def build_AloCollection_from_json(
-    nodesdb_f: str,
-    taxranks: List[str],
-    json_list: List[Dict[str, str]],
-    taxon_idx_mapping_file: str,
-    tree_f: Optional[str],
-):
-    """
-    Builds an AloCollection object from API input.
-
-    Args:
-        json_list List[Dict[str,str]]: JSON list of attributes.
-        taxon_idx_mapping_file str: The path to the taxon-idx mapping file
-        nodesdb_f (str): Path to the nodes database file for inferring taxonomic ranks.
-        taxranks (List[str]): List of taxonomic ranks to be inferred.
-        tree_f (Optional[str]): Path to the tree file. If provided, ALOs are added from the tree.
-
-    Returns:
-        AloCollection: An instance of the AloCollection class containing parsed data.
-    """
-    (
-        proteomes,
-        proteome_id_by_species_id,
-        attributes,
-        level_by_attribute_by_proteome_id,
-    ) = parse_attributes_from_json(
-        json_list=json_list,
-        taxon_idx_mapping_file=taxon_idx_mapping_file,
-    )
-
-    # Add taxonomy if needed
-    if "TAXID" in set(attributes):
-        logger.info(
-            "[STATUS] - Attribute 'TAXID' found, inferring taxonomic ranks from nodesDB"
-        )
-        attributes, level_by_attribute_by_proteome_id = add_taxid_attributes(
-            attributes=attributes,
-            level_by_attribute_by_proteome_id=level_by_attribute_by_proteome_id,
-            nodesdb_f=nodesdb_f,
-            taxranks=taxranks,
-        )
-    # Add ALOs from tree if provided
-    tree_ete: Optional[Tree] = None
-    node_idx_by_proteome_ids: Optional[Dict[Any, Any]] = None
-    tree_ete, node_idx_by_proteome_ids = parse_tree_from_file(
-        tree_f,
-        attributes,
-        level_by_attribute_by_proteome_id,
-        proteomes,
-    )
-
-    logger.info("[STATUS] - Building AloCollection ...")
-    return AloCollection(
-        proteomes=proteomes,
-        attributes=attributes,
-        proteome_id_by_species_id=proteome_id_by_species_id,
-        level_by_attribute_by_proteome_id=level_by_attribute_by_proteome_id,
-        node_idx_by_proteome_ids=node_idx_by_proteome_ids,
-        tree_ete=tree_ete,
-    )
-
-
 def get_protein_list_from_seq_f(sequence_ids_f: str, aloCollection: AloCollection):
     logger.info(f"[STATUS] - Parsing sequence IDs: {sequence_ids_f} ...")
+
     proteins_list: List[Protein] = []
     for line in yield_file_lines(sequence_ids_f):
         temp = line.split(": ")
@@ -293,9 +306,6 @@ def get_protein_list_from_seq_f(sequence_ids_f: str, aloCollection: AloCollectio
         if proteome_id := aloCollection.proteome_id_by_species_id.get(species_id, None):
             protein = Protein(protein_id, proteome_id, species_id, sequence_id)
             proteins_list.append(protein)
-        # else:
-        #     error_msg = f"[ERROR] - Offending SequenceID : {line} (unknown species_id {species_id})"
-        #     raise ValueError(error_msg)
     return proteins_list
 
 
@@ -359,12 +369,19 @@ def build_ProteinCollection(
 
 
 def build_ClusterCollection(
+    output_dir: str,
     cluster_f: str,
     proteinCollection: ProteinCollection,
     infer_singletons: Optional[bool],
+    available_proteomes: Set[str],
 ) -> ClusterCollection:
     logger.info(f"[STATUS] - Parsing {cluster_f} ... this may take a while")
-    cluster_list: List[Cluster] = parse_cluster_file(cluster_f, proteinCollection)
+    cluster_list: List[Cluster] = parse_cluster_file(
+        output_dir,
+        cluster_f,
+        proteinCollection,
+        available_proteomes,
+    )
 
     inferred_singletons_count = 0
     if infer_singletons:
diff --git a/src/core/clusters.py b/src/core/clusters.py
index eaac9d4..79cff89 100644
--- a/src/core/clusters.py
+++ b/src/core/clusters.py
@@ -86,7 +86,7 @@ def compute_protein_length_stats(
           (standard deviation) of protein lengths, if all lengths are available and at least
           one protein ID is provided. Returns None if no valid protein lengths are found.
         """
-        protein_lengths: List[int | None] = [
+        protein_lengths: List[Optional[int]] = [
             proteinCollection.proteins_by_protein_id[protein_id].length
             for protein_id in protein_ids
         ]
diff --git a/src/core/datastore.py b/src/core/datastore.py
index 6006e09..abb4c10 100644
--- a/src/core/datastore.py
+++ b/src/core/datastore.py
@@ -2,7 +2,7 @@
 import shutil
 import time
 from collections import Counter, defaultdict
-from typing import Any, Dict, FrozenSet, Generator, List, Optional, Set, Tuple
+from typing import Any, Dict, FrozenSet, Generator, List, Optional, Set, Tuple, Union
 
 import matplotlib as mat
 import matplotlib.pyplot as plt
@@ -14,7 +14,6 @@
 from core.alo_collections import AloCollection
 from core.build import (
     build_AloCollection,
-    build_AloCollection_from_json,
     build_ClusterCollection,
     build_ProteinCollection,
 )
@@ -39,24 +38,13 @@ class DataFactory:
     def __init__(self, inputData: InputData) -> None:
         self.dirs = {}
         self.inputData: InputData = inputData
-        if isinstance(self.inputData.config_data, str):
-            self.aloCollection: AloCollection = build_AloCollection(
-                config_f=self.inputData.config_data,
-                nodesdb_f=self.inputData.nodesdb_f,
-                tree_f=self.inputData.tree_f,
-                taxranks=self.inputData.taxranks,
-            )
-        elif self.inputData.taxon_idx_mapping_file is not None:
-            self.aloCollection: AloCollection = build_AloCollection_from_json(
-                nodesdb_f=self.inputData.nodesdb_f,
-                tree_f=self.inputData.tree_f,
-                taxranks=self.inputData.taxranks,
-                json_list=self.inputData.config_data,
-                taxon_idx_mapping_file=self.inputData.taxon_idx_mapping_file,
-            )
-        else:
-            raise ValueError("[ERROR] - Either provide config file or json")
-
+        self.aloCollection: AloCollection = build_AloCollection(
+            config_f=self.inputData.config_f,
+            nodesdb_f=self.inputData.nodesdb_f,
+            tree_f=self.inputData.tree_f,
+            taxranks=self.inputData.taxranks,
+            taxon_idx_mapping_file=self.inputData.taxon_idx_mapping_file,
+        )
         self.proteinCollection: ProteinCollection = build_ProteinCollection(
             aloCollection=self.aloCollection,
             fasta_dir=self.inputData.fasta_dir,
@@ -71,37 +59,39 @@ def __init__(self, inputData: InputData) -> None:
         )
         self.clusterCollection: ClusterCollection = build_ClusterCollection(
             cluster_f=self.inputData.cluster_f,
+            output_dir=self.inputData.output_path,
             proteinCollection=self.proteinCollection,
             infer_singletons=self.inputData.infer_singletons,
+            available_proteomes=self.aloCollection.proteomes,
         )
 
     def setup_dirs(self) -> None:
         """
         Set up output directories for storing results and attributes.
         """
-        output_path: Optional[str] = self.inputData.output_path
-
-        if output_path:
-            if not os.path.isabs(output_path):
-                output_path = os.path.abspath(output_path)
-        else:
-            output_path = os.path.join(os.getcwd(), "kinfin_results")
+        output_path: str = self.inputData.output_path
 
         self.dirs["main"] = output_path
         logger.info("[STATUS] - Output directories in")
         logger.info(f"\t{output_path}")
-        if os.path.exists(output_path):
-            logger.info("[STATUS] - Directory exists. Deleting directory ...")
-            shutil.rmtree(output_path)
+        log_file_path = (
+            os.path.join(output_path, "kinfin.log")
+            if os.path.exists(output_path)
+            else None
+        )
+        if not os.path.exists(output_path):
+            logger.info("[STATUS] - Creating main output directory...")
+            os.makedirs(output_path)
 
         logger.info("[STATUS] - Creating directories ...")
-        os.mkdir(output_path)
         for attribute in self.aloCollection.attributes:
             attribute_path = os.path.join(output_path, attribute)
             self.dirs[attribute] = attribute_path
             if not os.path.exists(attribute_path):
-                logger.info(f"\t{attribute_path}")
-                os.mkdir(attribute_path)
+                logger.info(
+                    f"[STATUS] - Creating directory for attribute: {attribute_path}"
+                )
+                os.makedirs(attribute_path)
 
         if self.aloCollection.tree_ete is not None:
             tree_path = os.path.join(output_path, "tree")
@@ -109,17 +99,23 @@ def setup_dirs(self) -> None:
             node_header_path = os.path.join(tree_path, "headers")
 
             if not os.path.exists(tree_path):
-                logger.info(f"\t{tree_path}")
-                os.mkdir(tree_path)
+                logger.info(f"[STATUS] - Creating tree directory: {tree_path}")
+                os.makedirs(tree_path)
                 self.dirs["tree"] = tree_path
 
-                logger.info(f"\t{node_chart_path}")
-                os.mkdir(node_chart_path)
+            if not os.path.exists(node_chart_path):
+                logger.info(
+                    f"[STATUS] - Creating node charts directory: {node_chart_path}"
+                )
+                os.makedirs(node_chart_path)
                 self.dirs["tree_charts"] = node_chart_path
 
-                if self.inputData.plot_tree:
-                    logger.info(f"\t{node_header_path}")
-                    os.mkdir(node_header_path)
+            if self.inputData.plot_tree:
+                if not os.path.exists(node_header_path):
+                    logger.info(
+                        f"[STATUS] - Creating node headers directory: {node_header_path}"
+                    )
+                    os.makedirs(node_header_path)
                     self.dirs["tree_headers"] = node_header_path
 
     def analyse_clusters(self) -> None:
@@ -213,10 +209,18 @@ def plot_rarefaction_data(
                 y_mins_array = np.array(y_mins)
                 y_maxs_array = np.array(y_maxs)
                 ax.plot(
-                    median_x_values, median_y_values, "-", color=colour, label=level
+                    median_x_values,
+                    median_y_values,
+                    "-",
+                    color=colour,
+                    label=level,
                 )
                 ax.fill_between(
-                    x_array, y_mins_array, y_maxs_array, color=colour, alpha=0.5
+                    x_array,
+                    y_mins_array,  # type:ignore
+                    y_maxs_array,  # type:ignore
+                    color=colour,
+                    alpha=0.5,
                 )
             ax.set_xlim([0, max_number_of_samples + 1])
             ax.set_ylabel("Count of non-singleton clusters", fontsize=fontsize)
@@ -387,7 +391,7 @@ def __process_level(
         attribute: str,
         level: str,
         protein_ids_by_level: Dict[str, List[str]],
-        protein_length_stats_by_level: Dict[str, Dict[str, int | float]],
+        protein_length_stats_by_level: Dict[str, Dict[str, Union[int, float]]],
         explicit_protein_count_by_proteome_id_by_level: Dict[str, Dict[str, int]],
     ) -> None:
         """
@@ -447,7 +451,7 @@ def __update_ALO_data(
         cluster: Cluster,
         attribute: str,
         protein_ids_by_level: Dict[str, List[str]],
-        protein_length_stats_by_level: Dict[str, Dict[str, int | float]],
+        protein_length_stats_by_level: Dict[str, Dict[str, Union[int, float]]],
         explicit_protein_count_by_proteome_id_by_level: Dict[str, Dict[str, int]],
     ) -> None:
         """
@@ -558,7 +562,7 @@ def __process_single_attribute(self, cluster: Cluster, attribute: str) -> None:
             None
         """
         protein_ids_by_level: Dict[str, List[str]] = {}
-        protein_length_stats_by_level: Dict[str, Dict[str, int | float]] = {}
+        protein_length_stats_by_level: Dict[str, Dict[str, Union[int, float]]] = {}
         explicit_protein_count_by_proteome_id_by_level: Dict[str, Dict[str, int]] = {}
 
         cluster.protein_counts_of_proteomes_by_level_by_attribute[attribute] = {}
@@ -849,7 +853,7 @@ def __plot_cluster_sizes(self) -> None:
             y_values.append(count)
         x_array = np.array(x_values)  # type: ignore
         y_array = np.array(y_values)
-        ax.scatter(x_array, y_array, marker="o", alpha=0.8, s=100)
+        ax.scatter(x_array, y_array, marker="o", alpha=0.8, s=100)  # type: ignore
         ax.set_xlabel("Cluster size", fontsize=self.inputData.fontsize)
         ax.set_ylabel("Count", fontsize=self.inputData.fontsize)
         ax.set_yscale("log")
@@ -1888,7 +1892,7 @@ def __plot_data(
         """
         # Plot histogram
         binwidth = 0.05
-        xymax = np.max(np.fabs(log2fc_array))
+        xymax = np.max(np.fabs(log2fc_array))  # type: ignore
         lim = (int(xymax / binwidth) + 1) * binwidth
         bins = np.arange(-lim, lim + binwidth, binwidth)
         axHistx.hist(
diff --git a/src/core/input.py b/src/core/input.py
index 38b9fd9..3562942 100644
--- a/src/core/input.py
+++ b/src/core/input.py
@@ -1,4 +1,5 @@
-from typing import Dict, List, Optional, Set, Tuple
+import os
+from typing import Dict, List, Optional, Set, Tuple, Union
 
 
 class ServeArgs:
@@ -14,7 +15,7 @@ def __init__(
         ipr_mapping_f: str,
         go_mapping_f: str,
         cluster_file: str,
-        config_data: List[Dict[str, str]] | str,
+        config_f: str,
         sequence_ids_file: str,
         species_ids_file: Optional[str] = None,
         functional_annotation_f: Optional[str] = None,
@@ -25,7 +26,7 @@ def __init__(
         plot_tree: bool = False,
         min_proteomes: int = 2,
         test: str = "mannwhitneyu",
-        taxranks: List[str] = None,
+        taxranks: List[str] = ["phylum", "order", "genus"],
         repetitions: int = 30,
         fuzzy_count: int = 1,
         fuzzy_fraction: float = 0.75,
@@ -35,16 +36,22 @@ def __init__(
         plot_format: str = "pdf",
         taxon_idx_mapping_file: Optional[str] = None,
     ):
-        if taxranks is None:
-            taxranks = ["phylum", "order", "genus"]
+        if output_path:
+            if not os.path.isabs(output_path):
+                output_path = os.path.abspath(output_path)
+        else:
+            output_path = os.path.join(os.getcwd(), "kinfin_results")
+
         self.cluster_f = cluster_file
-        self.config_data = config_data
+        self.config_f = config_f
         self.sequence_ids_f = sequence_ids_file
         self.species_ids_f = species_ids_file
         self.tree_f = tree_file
         self.functional_annotation_f = functional_annotation_f
+        if config_f.endswith(".json"):
+            if not taxon_idx_mapping_file:
+                raise ValueError("[ERROR] - taxon_idx_mapping not present")
         self.taxon_idx_mapping_file = taxon_idx_mapping_file
-
         self.nodesdb_f = nodesdb_f
         self.pfam_mapping_f = pfam_mapping_f
         self.ipr_mapping_f = ipr_mapping_f
diff --git a/src/core/logic.py b/src/core/logic.py
index f9ce5bd..6b09bfa 100644
--- a/src/core/logic.py
+++ b/src/core/logic.py
@@ -1,12 +1,17 @@
 import json
 import os
 from collections import defaultdict
-from typing import DefaultDict, Dict, List, Literal, Optional, Set, Tuple
+from typing import DefaultDict, Dict, List, Literal, Optional, Set, Tuple, Union
 
 import ete3
 from ete3 import Tree, TreeNode
 
-from core.utils import logger, progress, read_fasta_len, yield_file_lines
+from core.utils import (
+    progress,
+    read_fasta_len,
+    yield_config_lines,
+    yield_file_lines,
+)
 
 import logging
 
@@ -82,8 +87,9 @@ def get_lineage(
 
 
 # cli
-def parse_attributes_from_config_file(
+def parse_attributes_from_config_data(
     config_f: str,
+    taxon_idx_mapping_file: Optional[str],
 ) -> Tuple[Set[str], Dict[str, str], List[str], Dict[str, Dict[str, str]]]:
     """
     Parses attributes from a configuration file.
@@ -111,13 +117,13 @@ def parse_attributes_from_config_file(
         - The 'TAXON' attribute is expected to be unique for each line.
     """
 
-    logger.info(f"[STATUS] - Parsing config file: {config_f} ...")
+    logger.info(f"[STATUS] - Parsing config data ...")
     attributes: List[str] = []
     level_by_attribute_by_proteome_id: Dict[str, Dict[str, str]] = {}
     proteomes: Set[str] = set()
     proteome_id_by_species_id: Dict[str, str] = {}
 
-    for line in yield_file_lines(config_f):
+    for line in yield_config_lines(config_f, taxon_idx_mapping_file):
         if line.startswith("#"):
             if not attributes:
                 attributes = [x.strip() for x in line.lstrip("#").split(",")]
@@ -274,72 +280,6 @@ def parse_tree_from_file(
     return tree_ete, node_idx_by_proteome_ids
 
 
-# api
-def parse_attributes_from_json(
-    json_list: List[Dict[str, str]],
-    taxon_idx_mapping_file: str,
-) -> Tuple[Set[str], Dict[str, str], List[str], Dict[str, Dict[str, str]]]:
-    """
-    Parses attributes from a JSON list.
-
-    Args:
-        json_list List[Dict[str,str]]: JSON list of attributes.
-        taxon_idx_mapping_file str: The path to the taxon-idx mapping file
-
-    Returns:
-        Tuple[Set[str], Dict[str, str], List[str], Dict[str, Dict[str, str]]]: A tuple containing:
-            - A set of proteome IDs.
-            - A dictionary mapping species IDs to proteome IDs.
-            - A list of attributes.
-            - A dictionary mapping proteome IDs to dictionaries, where each inner dictionary
-              maps attributes to their corresponding levels.
-
-    Raises:
-        FileNotFoundError: If the specified configuration file is not found.
-        ValueError: If there are errors in the configuration file format or content.
-
-    Note:
-        - The configuration file is expected to have a header line starting with '#',
-          where the first element is 'IDX' and the second element is 'TAXON'.
-        - Each subsequent non-empty line in the configuration file should contain
-          comma-separated values corresponding to the attributes defined in the header line.
-        - The 'TAXON' attribute is expected to be unique for each line.
-    """
-
-    logger.info("[STATUS] - Parsing JSON list...")
-    attributes: List[str] = []
-    level_by_attribute_by_proteome_id: Dict[str, Dict[str, str]] = {}
-    proteomes: Set[str] = set()
-    proteome_id_by_species_id: Dict[str, str] = {}
-
-    attributes = list(json_list[0].keys())
-    attributes.insert(0, "IDX")
-
-    with open(taxon_idx_mapping_file, "r") as f:
-        taxon_idx_mapping = json.load(f)
-
-    attributes.insert(0, "all")
-
-    for entry in json_list:
-        proteome_id = entry["TAXON"]
-        species_id = taxon_idx_mapping[proteome_id]
-        proteomes.add(proteome_id)
-        proteome_id_by_species_id[species_id] = proteome_id
-
-        level_by_attribute_by_proteome_id[proteome_id] = {
-            attribute: entry.get(attribute, "") for attribute in attributes[1:]
-        }
-        level_by_attribute_by_proteome_id[proteome_id]["IDX"] = proteome_id
-        level_by_attribute_by_proteome_id[proteome_id]["all"] = "all"
-    attributes.insert(0, "all")
-    return (
-        proteomes,
-        proteome_id_by_species_id,
-        attributes,
-        level_by_attribute_by_proteome_id,
-    )
-
-
 def parse_fasta_dir(species_ids_f: str, fasta_dir: str) -> Dict[str, int]:
     """
     Parse a species IDs file to retrieve fasta file names and then calculate
diff --git a/src/core/proteins.py b/src/core/proteins.py
index ac0a41c..c47439d 100644
--- a/src/core/proteins.py
+++ b/src/core/proteins.py
@@ -1,5 +1,5 @@
 from collections import Counter
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 from core.utils import mean, median, sd
 
@@ -72,7 +72,7 @@ def add_annotation_to_protein(
 
     def get_protein_length_stats(
         self, protein_ids: List[str]
-    ) -> Dict[str, int | float]:
+    ) -> Dict[str, Union[int, float]]:
         """
         Calculate statistics (sum, mean, median, standard deviation) of protein lengths.
 
@@ -80,7 +80,7 @@ def get_protein_length_stats(
             protein_ids (List[str]): List of protein IDs for which to calculate statistics.
 
         Returns:
-            Dict[str, int | float]: A dictionary containing the calculated statistics:
+            Dict[str, Union[int, float]): A dictionary containing the calculated statistics:
                 - 'sum': Sum of lengths of proteins in the input list.
                 - 'mean': Mean length of proteins in the input list.
                 - 'median': Median length of proteins in the input list.
diff --git a/src/core/utils.py b/src/core/utils.py
index 93c4a4d..f7974fd 100644
--- a/src/core/utils.py
+++ b/src/core/utils.py
@@ -1,9 +1,10 @@
 import gzip
+import json
 import logging
 import os
 import sys
 from math import log, sqrt
-from typing import Any, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 
 import scipy
 import logging
@@ -11,7 +12,7 @@
 logger = logging.getLogger("kinfin_logger")
 
 
-def progress(iteration: int, steps: int | float, max_value: int) -> None:
+def progress(iteration: int, steps: Union[int, float], max_value: int) -> None:
     """
     Print progress in percentage based on the current iteration, steps, and maximum value.
 
@@ -36,7 +37,7 @@ def progress(iteration: int, steps: int | float, max_value: int) -> None:
         sys.stdout.flush()
 
 
-def check_file(filepath: str | None, install_kinfin: bool = False) -> None:
+def check_file(filepath: Optional[str], install_kinfin: bool = False) -> None:
     """
     Check if a file exists.
 
@@ -76,6 +77,32 @@ def yield_file_lines(filepath: str) -> Generator[str, Any, None]:
                 yield line.rstrip("\n")
 
 
+def yield_config_lines(
+    config_f: str,
+    taxon_idx_mapping_file: Optional[str],
+):
+    if config_f.endswith(".json"):
+        if not taxon_idx_mapping_file:
+            raise ValueError("[ERROR] - taxon_idx_mapping not present")
+
+        with open(taxon_idx_mapping_file, "r") as f_mapping, open(
+            config_f, "r"
+        ) as f_config:
+            taxon_idx_mapping = json.load(f_mapping)
+            config_data = json.load(f_config)
+            headers = ["IDX"] + list(config_data[0].keys())
+            yield "#" + ",".join(headers)
+
+            for item in config_data:
+                idx = taxon_idx_mapping[item["TAXON"]]
+                row = [idx] + [item[key] for key in headers[1:]]
+                yield ",".join(row)
+        return
+    else:
+        yield from yield_file_lines(config_f)
+        return
+
+
 def read_fasta_len(fasta_file: str) -> Generator[Tuple[str, int], Any, None]:
     """
     Generator function to parse a FASTA file and yield tuples of header and sequence length.