From 232e8e61d051f4f12785a5c2e2942af7034ac7a6 Mon Sep 17 00:00:00 2001 From: Sage Wright Date: Mon, 23 Dec 2024 16:42:39 +0000 Subject: [PATCH] updates lims ID --- Dockerfile | 2 +- docs/inputs/theiaprok.md | 2 +- docs/usage.md | 4 +-- docs/versioning/exhaustive.md | 3 +- tbp_parser/LIMS.py | 65 ++++++++++++++++++----------------- tbp_parser/__init__.py | 2 +- 6 files changed, 41 insertions(+), 37 deletions(-) diff --git a/Dockerfile b/Dockerfile index 675c834..0245d95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ # Shelby Bennett, Erin Young, Curtis Kapsak, & Kutluhan Incekara ARG SAMTOOLS_VER="1.18" -ARG TBP_PARSER_VER="2.2.2" +ARG TBP_PARSER_VER="2.3.0" FROM ubuntu:jammy AS builder diff --git a/docs/inputs/theiaprok.md b/docs/inputs/theiaprok.md index 4d5c6ee..46b74e8 100644 --- a/docs/inputs/theiaprok.md +++ b/docs/inputs/theiaprok.md @@ -29,7 +29,7 @@ The following optional inputs are also available for user modification on Terra: | `merlin_magic` | **tbp_parser_coverage_regions_bed** | File | A BED file containing the regions to calculate percent coverage for | [tbdb-modified-regions.md](https://github.com/theiagen/tbp-parser/blob/main/data/tbdb-modified-regions.bed) | | `merlin_magic` | **tbp_parser_coverage_threshold** | Int | The minimum percentage of a region that has depth above the threshold set by `min_depth` (used for a gene/locus to pass QC) | 100 | | `merlin_magic` | **tbp_parser_debug** | Boolean | Set to `false` to turn off debug mode for `tbp-parser` | `true` | -| `merlin_magic` | **tbp_parser_docker_image** | String | The Docker image to use when running `tbp-parser` | "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.2" | +| `merlin_magic` | **tbp_parser_docker_image** | String | The Docker image to use when running `tbp-parser` | "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.3.0" | | `merlin_magic` | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in `tbp-parser` | 0.1 | | `merlin_magic` | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | | `merlin_magic` | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 | diff --git a/docs/usage.md b/docs/usage.md index a6e8f8b..5013832 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -9,7 +9,7 @@ title: Getting Started We highly recommend using the following Docker iamge to run tbp-parser: ``` bash -docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.2 #(1)! +docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.3.0 #(1)! ``` 1. We host our Docker images on the Google Artifact Registry so that they are always availble for usage. @@ -21,7 +21,7 @@ docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiage # Once inside the container interactively, you can run the tbp-parser tool python3 /tbp-parser/tbp_parser/tbp_parser.py -v -# v2.2.2 +# 2.3.0 ``` ### Locally with Python diff --git a/docs/versioning/exhaustive.md b/docs/versioning/exhaustive.md index 1df0d9f..ff80ebd 100644 --- a/docs/versioning/exhaustive.md +++ b/docs/versioning/exhaustive.md @@ -68,7 +68,8 @@ The following is a list of every version of `tbp-parser` and a short summary of - v2.2.0 - removes ciprofloxacin, fluoroquinolones, and ofloxacin from gyrA and gyrB and aminoglycosides from rrs in the `globals.GENE_TO_ANTIMICROBIAL_DRUG_NAME` dictionary; if a drug is missing in the TBProfiler JSON's gene_associated_drug field that is present in that global dictionary, it will be added for the mutation. - v2.2.1 - fixes a bug where rifampicin was not renamed to rifampin, which caused duplicate lines to appear in the Laboratorian report. - v2.2.2 - removes the high-level and low-level resistance comments from the LIMS report - +- v2.3.0 - reworks the lineage detection so that if TBProfiler detects a lineage, it is reported; if no lineage is reported by TBProfiler, then whether or not M.tb was detected depends on the percentage of LIMS genes above a (now lower) default percentage of 0.7 + --- The following diagram shows how each version is related to the others without technical details: diff --git a/tbp_parser/LIMS.py b/tbp_parser/LIMS.py index 039a20d..a85841a 100644 --- a/tbp_parser/LIMS.py +++ b/tbp_parser/LIMS.py @@ -28,18 +28,18 @@ def get_id(self): """ self.logger.info("LIMS:Within LIMS class get_id function") + # if the percentage of genes above the coverage threshold is greater than 70%, then we can call the lineage if TBProfiler did not designate it + percentage_limit = 0.7 + # calculate percentage of genes in the LIMS report above the coverage threshold self.logger.debug("LIMS:Calculating the percentage of LIMS genes above the coverage threshold") if self.tngs: number_of_lims_genes_above_coverage_threshold = sum(int(globals.COVERAGE_DICTIONARY[gene]) >= 90 for gene in globals.COVERAGE_DICTIONARY.keys()) percentage_lims_genes_above = number_of_lims_genes_above_coverage_threshold / len(globals.COVERAGE_DICTIONARY.keys()) - # if the percentage of genes above the coverage threshold is greater than 70%, then we can call the lineage - percentage_limit = 0.7 + else: number_of_lims_genes_above_coverage_threshold = sum(int(globals.COVERAGE_DICTIONARY[gene]) >= globals.COVERAGE_THRESHOLD for gene in globals.GENES_FOR_LIMS) percentage_lims_genes_above = number_of_lims_genes_above_coverage_threshold / len(globals.GENES_FOR_LIMS) - # if the percentage of genes above the coverage threshold is greater than 90%, then we can call the lineage - percentage_limit = 0.9 self.logger.debug("LIMS:The percentage of LIMS genes above the coverage threshold is {}".format(percentage_lims_genes_above)) @@ -54,37 +54,40 @@ def get_id(self): self.logger.debug("LIMS:The detected lineage is: '{}', and the detected sublineage is: '{}'".format(detected_lineage, detected_sublineage)) sublineages = detected_sublineage.split(";") - if percentage_lims_genes_above >= percentage_limit: - self.logger.debug("LIMS:Percentage of LIMS genes above the coverage threshold is GREATER than 90%") - if self.tngs: - self.logger.debug("LIMS:The sequencing method is tNGS; now checking for a His57Asp mutation in pncA") - pncA_mutations = globals.DF_LABORATORIAN[(globals.DF_LABORATORIAN["tbprofiler_gene_name"] == "pncA")] - if "p.His57Asp" in pncA_mutations["tbprofiler_variant_substitution_aa"].tolist(): - self.logger.debug("LIMS:p.His57Asp detected in pncA, lineage is likely M. bovis") - lineage.add("DNA of Mycobacterium bovis detected") - else: - self.logger.debug("LIMS:p.His57Asp not detected in pncA, lineage is likely M. tuberculosis") - lineage.add("DNA of Mycobacterium tuberculosis complex detected (not M. bovis)") + if self.tngs: + self.logger.debug("LIMS:The sequencing method is tNGS; now checking for a His57Asp mutation in pncA") + pncA_mutations = globals.DF_LABORATORIAN[(globals.DF_LABORATORIAN["tbprofiler_gene_name"] == "pncA")] + if "p.His57Asp" in pncA_mutations["tbprofiler_variant_substitution_aa"].tolist(): + self.logger.debug("LIMS:p.His57Asp detected in pncA, lineage is likely M. bovis") + lineage.add("DNA of Mycobacterium bovis detected") + else: + self.logger.debug("LIMS:p.His57Asp not detected in pncA, lineage is likely M. tuberculosis") + lineage.add("DNA of Mycobacterium tuberculosis complex detected (not M. bovis)") + + else: + self.logger.debug("LIMS:The sequencing method is WGS; now checking the TBProfiler lineage calls") + if "lineage" in detected_lineage: + lineage.add("DNA of Mycobacterium tuberculosis species detected") + + for sublineage in sublineages: + if "BCG" in detected_lineage or "BCG" in sublineage: + lineage.add("DNA of Mycobacterium bovis BCG detected") + + elif ("La1" in detected_lineage or "La1" in sublineage) or ("bovis" in detected_lineage or "bovis" in sublineage): + lineage.add("DNA of Mycobacterium bovis (not BCG) detected") + + if len(lineage) == 0: + if (percentage_lims_genes_above >= percentage_limit): + self.logger.debug("LIMS:Percentage of LIMS genes above the coverage threshold is GREATER than 90% AND no lineage has been detected") + self.logger.debug("LIMS:TBProfiler was likely unable to determine the lineage, but since percentage_lims_genes_above >= percentage_limit, we will assume M.tb") - else: - self.logger.debug("LIMS:The sequencing method is WGS; now checking the TBProfiler lineage calls") - if "lineage" in detected_lineage: - lineage.add("DNA of Mycobacterium tuberculosis species detected") - - for sublineage in sublineages: - if "BCG" in detected_lineage or "BCG" in sublineage: - lineage.add("DNA of Mycobacterium bovis BCG detected") - - elif ("La1" in detected_lineage or "La1" in sublineage) or ("bovis" in detected_lineage or "bovis" in sublineage): - lineage.add("DNA of Mycobacterium bovis (not BCG) detected") - if detected_lineage == "" or detected_lineage == "NA" or len(lineage) == 0: lineage.add("DNA of Mycobacterium tuberculosis complex detected") - - else: - self.logger.debug("LIMS:Percentage of LIMS genes above the coverage threshold is LESS than 90%") - lineage.add("DNA of Mycobacterium tuberculosis complex NOT detected") + + else: + self.logger.debug("LIMS:Percentage of LIMS genes above the coverage threshold is LESS than 90% AND no lineage has been detected") + lineage.add("DNA of Mycobacterium tuberculosis complex NOT detected") lineage = "; ".join(sorted(lineage)) diff --git a/tbp_parser/__init__.py b/tbp_parser/__init__.py index 424bd53..967477f 100644 --- a/tbp_parser/__init__.py +++ b/tbp_parser/__init__.py @@ -1 +1 @@ -__VERSION__ = "v2.2.0" \ No newline at end of file +__VERSION__ = "v2.3.0" \ No newline at end of file