SwiftSeal · SwiftSeal · Jan 6, 2025 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/README.md b/README.md
@@ -6,7 +6,6 @@
 ![Conda Downloads](https://img.shields.io/conda/dn/bioconda/resistify)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/swiftseal/resistify/blob/main/assets/resistify.ipynb)
 
-*More than 2,500 downloads - thank you all!*
 </div>
 
 Resistify is a program which rapidly identifies and classifies plant resistance genes from protein sequences.
@@ -36,7 +35,7 @@ To use these with - for example - `singularity`, simply run:
 If you are having issues with `conda`, you can instead try installing directly from the repository:
 
 ```sh
-pip install https://github.com/SwiftSeal/resistify/archive/refs/tags/v0.6.2.tar.gz
+pip install https://github.com/SwiftSeal/resistify/archive/refs/tags/v1.0.1.tar.gz
 ```
 
 Note that `resistify` requires `hmmer` to be installed and available in your system's PATH, which will not be installed automatically when using `pip`.
@@ -116,9 +115,9 @@ Approximately 13G of disk space is required.
 
 ### results.tsv (nlr)
 
-| Sequence | Length | Motifs | Domains | Classification | NBARC_motifs | MADA | MADAL | CJID |
-| --- | --- | --- | --- | --- | --- | --- | --- | --- |
-| ZAR1 | 852 | CNNNNNNNNNLLLLLLLLLL | mCNL | CNL | 9 | False | True | False |
+| Sequence | Length | LRR_Length | Motifs | Domains | Classification | NBARC_motifs | MADA | MADAL | CJID |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| ZAR1 | 852 | 307 | CNNNNNNNNNLLLLLLLLLL | mCNL | CNL | 9 | False | True | False |
 
 The main column of interest is "Classification", where we can see that it has been identified as a canonical CNL.
 The "Motifs" column indicates the series of NLR-associated motifs identified across the sequence - this can be useful if an NLR has an undetermined or unexpected classification.
@@ -127,9 +126,9 @@ Here, it appears that ZAR1 has a MADA-like motif.
 
 ### results.tsv (prr)
 
-| Sequence | Length | Type | Classification | Signal_peptide |
-| --- | --- | --- | --- | --- |
-| fls2 | 1174 | RLK | LRR | True |
+| Sequence | Length | Extracellular_Length | LRR_Length | Type | Classification | Signal_peptide |
+| --- | --- | --- | --- | --- | --- | --- |
+| fls2 | 1173 | 806 | 675 | RLK | LRR | True |
 
 For PRRs, sequences can be of the type RLP or RLK - both are single pass transmembrane proteins, and RLKs have an internal kinase domain.
 Classification refers to the domains identified in the external region.

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "resistify"
-version = "1.0.0"
+version = "1.0.1"
 dependencies = [
   "scikit-learn>=0.24.2",
   "numpy",
@@ -14,6 +14,7 @@ dependencies = [
   "fair-esm",
   "transformers",
   "sentencepiece",
+  "threadpoolctl",
 ]
 authors = [
   { name="Moray Smith", email="[email protected]" },

diff --git a/resistify/__version__.py b/resistify/__version__.py
@@ -1 +1 @@
-__version__ = "1.0.0"
+__version__ = "1.0.1"
diff --git a/resistify/main.py b/resistify/main.py
@@ -68,6 +68,12 @@ def add_common_args(parser):
         default=None,
         type=int,
     )
+    parser.add_argument(
+        "-t", "--threads",
+        help="Number of threads available for nlrexpress. Default is the number of available CPUs.",
+        default=None,
+        type=int,
+    )
 
 
 def validate_input_file(filepath):
@@ -199,7 +205,7 @@ def nlr(args, log):
     else:
         chunksize = args.chunksize
 
-    sequences = nlrexpress(sequences, "all", chunksize)
+    sequences = nlrexpress(sequences, "all", chunksize, args.threads)
 
     if args.coconat:
         log.info("Running CoCoNat to identify additional CC domains...")
@@ -234,7 +240,7 @@ def prr(args, log):
     sequences = [sequence for sequence in sequences if sequence.is_rlp()]
     if len(sequences) > 0:
         log.info(f"{len(sequences)} PRRs identified...")
-        sequences = nlrexpress(sequences, "lrr", chunksize)
+        sequences = nlrexpress(sequences, "lrr", chunksize, args.threads)
 
         log.info("Classifying PRRs...")
         for sequence in sequences:

diff --git a/resistify/nlrexpress.py b/resistify/nlrexpress.py
@@ -6,6 +6,7 @@
 import logging
 import tempfile
 from multiprocessing import Pool, cpu_count, get_context
+from threadpoolctl import threadpool_limits
 import shutil
 import warnings
 from resistify.utility import log_percentage
@@ -130,11 +131,12 @@ def parse_jackhmmer(file, iteration=False):
     return hmm_dict
 
 
-def nlrexpress(sequences, search_type, chunk_size):
-    try:
-        threads = len(os.sched_getaffinity(0))
-    except AttributeError:
-        threads = cpu_count()
+def nlrexpress(sequences, search_type, chunk_size, threads):
+    if threads is None:
+        try:
+            threads = len(os.sched_getaffinity(0))
+        except AttributeError:
+            threads = cpu_count()
 
     models = load_models(search_type)
 
@@ -273,7 +275,8 @@ def nlrexpress_subprocess(params):
 
         matrix = np.array(matrix, dtype=float)
 
-        result = model.predict_proba(matrix)
+        with threadpool_limits(limits=2):
+            result = model.predict_proba(matrix)
 
         result_index = 0
         for sequence in sequences: