SwiftSeal · SwiftSeal · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@ To use these with - for example - `singularity`, simply run:
 If you are having issues with `conda`, you can instead try installing directly from the repository:
 
 ```sh
-pip install https://github.com/SwiftSeal/resistify/archive/refs/tags/v1.1.2.tar.gz
+pip install https://github.com/SwiftSeal/resistify/archive/refs/tags/v1.1.3.tar.gz
 ```
 
 Note that `resistify` requires `hmmer` to be installed and available in your system's PATH, which will not be installed automatically when using `pip`.

diff --git a/assets/resistify.ipynb b/assets/resistify.ipynb
@@ -10,7 +10,7 @@
     "Google Colab is quite limited in the resources that it gives, so it will be a lot slower than running locally.\n",
     "\n",
     "In this example, we'll be downloading proteins from the potato genome and taking a random selection of ~1000.\n",
-    "If you'd like to use your own sequences, you can use the bar on the left to upload a file of your choice, modify the code to use your respective fasta file.\n",
+    "If you'd like to use your own sequences, you can use the bar on the left to upload a file of your choice, and modify the code to use your respective fasta file.\n",
     "\n",
     "To get started, use the bar at the top to start a session.\n",
     "You'll want to select a GPU instance so that the PRR pipeline runs quickly!\n",

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "resistify"
-version = "1.1.2"
+version = "1.1.3"
 dependencies = [
   "scikit-learn>=0.24.2",
   "numpy",

diff --git a/resistify/__version__.py b/resistify/__version__.py
@@ -1 +1 @@
-__version__ = "1.1.2"
+__version__ = "1.1.3"
diff --git a/resistify/coconat.py b/resistify/coconat.py
@@ -10,7 +10,7 @@
 import tempfile
 import warnings
 from resistify._loguru import logger
-from resistify.utility import log_percentage
+from resistify.utility import ProgressLogger
 
 logging.getLogger("transformers").setLevel(logging.CRITICAL)
 
@@ -176,8 +176,7 @@ def coconat(sequences, models_path: str):
     output_file = os.path.join(temp_dir.name, "out")
     prefix_path = os.path.join(temp_dir.name, "crf")
 
-    total_iterations = len(sequences)
-    iteration = 0
+    progress_logger = ProgressLogger(len(sequences))
     for sequence in sequences:
         logger.debug(f"Processing {sequence.id}...")
 
@@ -244,7 +243,6 @@ def coconat(sequences, models_path: str):
         probability_matrix = np.loadtxt(f"{prefix_path}_0")
         cc_probability = 1 - probability_matrix[:, 0]
         sequence.cc_probs = cc_probability
-        iteration += 1
-        log_percentage(iteration, total_iterations)
+        progress_logger.update()
 
     return sequences
diff --git a/resistify/nlrexpress.py b/resistify/nlrexpress.py
@@ -9,7 +9,7 @@
 from threadpoolctl import threadpool_limits
 import shutil
 import warnings
-from resistify.utility import log_percentage
+from resistify.utility import ProgressLogger
 from resistify._loguru import logger
 
 # Version 1.3 of sklearn introduced InconsistentVersionWarning, fall back to UserWarning if not available
@@ -149,8 +149,7 @@ def nlrexpress(sequences, search_type, chunk_size, threads):
 
     logger.info("Running NLRexpress - this could take a while...")
 
-    iterations = 0
-    total_iterations = len(batches)
+    progress_logger = ProgressLogger(len(batches))
     results = []
     with concurrent.futures.ProcessPoolExecutor(
         max_workers=-(-threads // 2)
@@ -161,8 +160,7 @@ def nlrexpress(sequences, search_type, chunk_size, threads):
         ]
 
         for future in concurrent.futures.as_completed(futures):
-            iterations += 1
-            log_percentage(iterations, total_iterations)
+            progress_logger.update()
             for sequence in future.result():
                 results.append(sequence)
 

diff --git a/resistify/tmbed.py b/resistify/tmbed.py
@@ -20,7 +20,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from resistify.utility import log_percentage
+from resistify.utility import ProgressLogger
 import logging
 import warnings
 from resistify._loguru import logger
@@ -452,8 +452,7 @@ def tmbed(sequences, models_path):
         "o": "outside",
     }
 
-    total_iterations = len(sequences)
-    iteration = 0
+    progress_logger = ProgressLogger(len(sequences))
     for sequence in sequences:
         try:
             logger.debug(f"Predicting transmembrane domains for {sequence.id}...")
@@ -464,11 +463,6 @@ def tmbed(sequences, models_path):
             )
             continue
 
-        # CPU alternative, implement fallback?
-        # encoder.to_cpu()
-        # torch.cuda.empty_cache()
-        # embeddings = encoder.embed(sequences)
-
         embedding = embedding.to(device=device)
         embedding = embedding.to(dtype=torch.float32)
 
@@ -511,7 +505,6 @@ def tmbed(sequences, models_path):
             len(sequence.seq),
         )
 
-        iteration += 1
-        log_percentage(iteration, total_iterations)
+        progress_logger.update()
 
     return sequences
diff --git a/resistify/utility.py b/resistify/utility.py
@@ -9,13 +9,30 @@
 from resistify._loguru import logger
 
 
-def log_percentage(n, total):
-    if total < 10:
-        logger.info(f"{n} of {total} complete")
-    else:
-        percent_complete = int((n / total) * 100)
-        if percent_complete % 10 == 0:
-            logger.info(f"{percent_complete}% complete")
+class ProgressLogger:
+    def __init__(self, total_count):
+        self.total_count = total_count
+        self.current_count = 0
+        self.last_reported_percent = -1  # Initialize with an invalid percentage
+
+    def update(self):
+        self.current_count += 1
+        if self.total_count < 10:
+            # For small totals, report as "n of total"
+            logger.info(f"{self.current_count} of {self.total_count} complete")
+        else:
+            # Calculate percentage
+            percent_complete = int((self.current_count / self.total_count) * 100)
+            if self.current_count == self.total_count:
+                logger.info("100% complete")
+                self.last_reported_percent = 100
+            elif (
+                percent_complete % 10 == 0
+                and percent_complete > 0
+                and percent_complete > self.last_reported_percent
+            ):
+                logger.info(f"{percent_complete}% complete")
+                self.last_reported_percent = percent_complete
 
 
 def create_output_directory(outdir):