Skip to content

Commit

Permalink
fix issues with qc and lims report
Browse files Browse the repository at this point in the history
  • Loading branch information
sage-wright committed Apr 10, 2024
1 parent e0ba15e commit 421835b
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 48 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Shelby Bennett, Erin Young, Curtis Kapsak, & Kutluhan Incekara

ARG SAMTOOLS_VER="1.18"
ARG TBP_PARSER_VER="1.4.1"
ARG TBP_PARSER_VER="1.4.2"

FROM ubuntu:jammy as builder

Expand Down Expand Up @@ -42,7 +42,7 @@ ARG TBP_PARSER_VER
LABEL base.image="ubuntu:jammy"
LABEL dockerfile.version="1"
LABEL software="tbp-parser"
LABEL software.version="1.4.1"
LABEL software.version="1.4.2"
LABEL description="tbp-parser and samtools"
LABEL website="https://github.com/theiagen/tbp-parser"
LABEL license="https://github.com/theiagen/tbp-parser/blob/main/LICENSE"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ See also [this page](https://theiagen.notion.site/tbp-parser-b02bef0cbc814b12987
We highly recommend using the following Docker image to run tbp-parser:

```markdown
docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.1
docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.2
```

The entrypoint for this Docker image is the tbp-parser help message. To run this container interactively, use the following command:

```markdown
docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.1
docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.2
# Once inside the container interactively, you can run the tbp-parser tool
python3 /tbp-parser/tbp_parser/tbp_parser.py -v
# v1.4.1
# v1.4.2
```

### Locally with Python
Expand Down
23 changes: 16 additions & 7 deletions tbp_parser/LIMS.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ def apply_lims_rules(self, gene_dictionary, DF_LIMS, max_mdl_resistance, antimic
else:
self.logger.debug("LIMS:The other mutation ({}) has higher read support ({}) than the current mutation ({}; {})".format(aa_mutations_per_gene[matching_index], read_supports[matching_index], mutation, read_supports[current_index]))
removal_list.append(mutation)


if ("This mutation is outside the expected region" in warnings[current_index]):
removal_list.append(mutation)

# remove all mutations that have lower read support
if len(removal_list) > 0:
for mutation in removal_list:
Expand Down Expand Up @@ -189,9 +192,9 @@ def apply_lims_rules(self, gene_dictionary, DF_LIMS, max_mdl_resistance, antimic
aa_mutation = ""

# do not add the mutation if the particular mutation has low quality or is blank
if ("Failed quality in the mutation position" in warnings[index]) or ("Insufficient Coverage" in mdl_interpretations[index]) or (mutation == "") or ("This mutation is outside the expected region" in warnings[index]):
if ("Failed quality in the mutation position" in warnings[index]) or ("Insufficient Coverage" in mdl_interpretations[index]) or (mutation == ""):
self.logger.debug("LIMS:This mutation (\"{}\", origin gene: {}) is not being added to the LIMS report because it failed quality in the mutation position, was WT, or had insufficient locus coverage".format(mutation, gene))
if "del" in mutation and "Failed quality in the mutation position" in warnings[index]:
if "del" in mutation and "Failed quality in the mutation position" in warnings[index] and "Insufficient coverage in locus" in warnings[index]:
DF_LIMS[gene_code] = "No sequence"
else:
DF_LIMS[gene_code] = "No mutations detected"
Expand All @@ -205,10 +208,16 @@ def apply_lims_rules(self, gene_dictionary, DF_LIMS, max_mdl_resistance, antimic
all_responsible_mdl_interpretations[gene][index] = "WT"

if "del" in mutation and "Failed quality in the mutation position" in warnings[index]:
mdl_interpretations[index] = "Insufficient Coverage"
if gene in responsible_gene:
all_responsible_mdl_interpretations[gene][index] = "Insufficient Coverage"

try:
if int(globals.COVERAGE_DICTIONARY[gene]) < globals.COVERAGE_THRESHOLD:
mdl_interpretations[index] = "Insufficient Coverage"
if gene in responsible_gene:
all_responsible_mdl_interpretations[gene][index] = "Insufficient Coverage"
else:
self.logger.debug("This gene ({}) has sufficient coverage a deletion being present".format(gene))
except:
self.logger.debug("This gene ({})is not in the coverage dictionary".format(gene))

self.logger.debug("LIMS:Since this MDL interpretation changed, we are now potentially recalculating max_mdl_resistance (currently {})".format(max_mdl_resistance[0]))
if (max([globals.RESISTANCE_RANKING[interpretation] for gene_set in all_responsible_mdl_interpretations.values() for interpretation in gene_set]) != globals.RESISTANCE_RANKING[max_mdl_resistance[0]]) and gene in responsible_gene:

Expand Down
70 changes: 35 additions & 35 deletions tbp_parser/Row.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, logger, variant, who_confidence, drug, gene_name=None, depth=
# if self.tbprofiler_gene_name in globals.TNGS_REGIONS.keys():
# self.logger.debug("ROW:[tNGS only] This mutation's genomic position is outside the expected region.")
# self.warning.append("This mutation is outside the expected region")
try:
if self.tbprofiler_gene_name in globals.COVERAGE_DICTIONARY:
if (self.depth < globals.MIN_DEPTH) or (float(globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]) < globals.COVERAGE_THRESHOLD):
self.logger.debug("ROW:The depth of coverage for this variant is {} and the coverage for the gene is {}; applying a locus warning".format(self.depth, globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]))
if (float(globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]) < globals.COVERAGE_THRESHOLD):
Expand All @@ -84,40 +84,7 @@ def __init__(self, logger, variant, who_confidence, drug, gene_name=None, depth=
self.logger.debug("ROW:This is a deletion, no warning added for the locus unless it fails positional qc (checked next)")
else:
self.warning.append("Insufficient coverage in locus")

protein_position = globals.get_position(self.tbprofiler_variant_substitution_aa)

# check to see if we need to apply a mutation warning
# (check rrs & rrl for low frequency and read support;
# also check ethA & rpoB for specific protein position frequency)
if ((self.depth < globals.MIN_DEPTH)
or (self.tbprofiler_gene_name not in ["rrs", "rrl"] and
(float(self.frequency) < globals.MIN_FREQUENCY or self.read_support < globals.MIN_READ_SUPPORT))
or (self.tbprofiler_gene_name == "rrs" and
(float(self.frequency) < globals.RRS_FREQUENCY or self.read_support < globals.RRS_READ_SUPPORT))
or (self.tbprofiler_gene_name == "rrl" and
(float(self.frequency) < globals.RRL_FREQUENCY or self.read_support < globals.RRL_READ_SUPPORT))
or (self.tbprofiler_gene_name == "ethA" and
237 in protein_position and float(self.frequency) < globals.ETHA237_FREQUENCY)
or (self.tbprofiler_gene_name == "rpoB" and
449 in protein_position and float(self.frequency) < globals.RPOB449_FREQUENCY)):
self.logger.debug("ROW:The depth of coverage for this variant is {}, the frequency is {}, and the read support is {}; applying an additional mutation position warning".format(self.depth, self.frequency, self.read_support))

if ((float(globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]) < globals.COVERAGE_THRESHOLD) and
("del" in self.tbprofiler_variant_substitution_nt
or self.tbprofiler_gene_name in globals.GENES_WITH_DELETIONS)):
self.logger.debug("ROW:This deletion failed in the mutation position and there was insufficient coverage locus, adding insufficient coverage warning")
self.warning.append("Insufficient coverage in locus")

globals.MUTATION_FAIL_LIST.append(self.tbprofiler_variant_substitution_nt)
self.warning.append("Failed quality in the mutation position")

elif (float(globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]) < globals.COVERAGE_THRESHOLD):
self.logger.debug("ROW:The depth of coverage for this variant is {}, the frequency is {}, and the read support is {}; no additional warning added for the mutation position".format(self.depth, self.frequency, self.read_support))

else: # all other variants, no warning added
self.warning = [""]
except:
else:
self.logger.debug("ROW:This gene does not appear in the coverage dictionary. An additional warning will be given.")
if self.tbprofiler_gene_name in globals.TNGS_REGIONS.keys():
self.logger.debug("ROW:[tNGS only] This mutation's genomic position is outside the expected region")
Expand All @@ -127,6 +94,39 @@ def __init__(self, logger, variant, who_confidence, drug, gene_name=None, depth=
self.mdl_interpretation = "NA"
else:
self.warning.append("This mutation is outside the expected region")

protein_position = globals.get_position(self.tbprofiler_variant_substitution_aa)

# check to see if we need to apply a mutation warning
# (check rrs & rrl for low frequency and read support;
# also check ethA & rpoB for specific protein position frequency)
if ((self.depth < globals.MIN_DEPTH)
or (self.tbprofiler_gene_name not in ["rrs", "rrl"] and
(float(self.frequency) < globals.MIN_FREQUENCY or self.read_support < globals.MIN_READ_SUPPORT))
or (self.tbprofiler_gene_name == "rrs" and
(float(self.frequency) < globals.RRS_FREQUENCY or self.read_support < globals.RRS_READ_SUPPORT))
or (self.tbprofiler_gene_name == "rrl" and
(float(self.frequency) < globals.RRL_FREQUENCY or self.read_support < globals.RRL_READ_SUPPORT))
or (self.tbprofiler_gene_name == "ethA" and
237 in protein_position and float(self.frequency) < globals.ETHA237_FREQUENCY)
or (self.tbprofiler_gene_name == "rpoB" and
449 in protein_position and float(self.frequency) < globals.RPOB449_FREQUENCY)):
self.logger.debug("ROW:The depth of coverage for this variant is {}, the frequency is {}, and the read support is {}; applying an additional mutation position warning".format(self.depth, self.frequency, self.read_support))

if self.tbprofiler_gene_name in globals.COVERAGE_DICTIONARY.keys():
if ((float(globals.COVERAGE_DICTIONARY[self.tbprofiler_gene_name]) < globals.COVERAGE_THRESHOLD) and
("del" in self.tbprofiler_variant_substitution_nt
or self.tbprofiler_gene_name in globals.GENES_WITH_DELETIONS)):
self.logger.debug("ROW:This deletion failed in the mutation position and there was insufficient coverage locus, adding insufficient coverage warning")
self.warning.append("Insufficient coverage in locus")

globals.MUTATION_FAIL_LIST.append(self.tbprofiler_variant_substitution_nt)
self.warning.append("Failed quality in the mutation position")

else:
self.logger.debug("ROW:The depth of coverage for this variant is {}, the frequency is {}, and the read support is {}; no additional warning added for the mutation position".format(self.depth, self.frequency, self.read_support))
if len(self.warning) == 0:
self.warning = [""]

self.logger.debug("ROW:This variant has the following warnings: {}".format(self.warning))

Expand Down
2 changes: 1 addition & 1 deletion tbp_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__VERSION__ = "v1.4.1"
__VERSION__ = "v1.4.2"

0 comments on commit 421835b

Please sign in to comment.