Skip to content

Commit

Permalink
Merge branch 'master' into aviti-manifest-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
kedhammar authored Nov 4, 2024
2 parents 2bc45ad + a3d1b58 commit 4aa9dd0
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 226 deletions.
20 changes: 18 additions & 2 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
# Scilifelab_epps Version Log

## 20241014.1
## 20241104.2

For AVITI manifest generation: make PhiX manifest variant, fix udf typo, remove unused func, clarify var names
For AVITI manifest generation: make PhiX manifest variant, fix udf typo, remove unused func, clarify var names, add cases to reverse-compliment Index2.

## 20241104.1

Suspected bugfix for BA parsing script.

## 20241025.1

Support MiSeq V2 Micro

## 20241016.1

Remove index orientation checker

## 20241015.1

Improve project validator EPP

## 20241011.1

Expand Down
5 changes: 5 additions & 0 deletions scripts/illumina_run_parameter_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,11 @@ def lims_for_miseq(process, run_dir):
and runParameters["Setup"]["NumTilesPerSwath"] == "2"
):
process.udf["Run Type"] = "Version2Nano"
elif (
runParameters["Setup"]["SupportMultipleSurfacesInUI"] == "true"
and runParameters["Setup"]["NumTilesPerSwath"] == "4"
):
process.udf["Run Type"] = "Version2Micro"
else:
process.udf["Run Type"] = "null"
# Runs with single read return a dict object
Expand Down
207 changes: 1 addition & 206 deletions scripts/index_distance_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
import sys
from argparse import ArgumentParser

import psycopg2
import yaml
from genologics.config import BASEURI, PASSWORD, USERNAME
from genologics.entities import Process, Project
from genologics.entities import Process
from genologics.lims import Lims

from data.Chromium_10X_indexes import Chromium_10X_indexes
Expand Down Expand Up @@ -79,209 +78,6 @@ def verify_indexes(data):
return message


def verify_orientation(data):
message = []
connection = psycopg2.connect(
user=config["username"],
host=config["url"],
database=config["db"],
password=config["password"],
)
cursor = connection.cursor()
query = (
"select reagenttype.name from reagenttype " "where reagenttype.name like '{}%';"
)
# We only search against part of the index sets that exist in LIMS
index_sets_10nt = [
"AmpliconUD_UDP_",
"IDT_10nt_UD_",
"NexteraUD_UDP_",
"QIAseq_UX_UDI_",
"v2_IDT_10nt_UD_",
"v3_Illumina_10nt_UD_",
"xGen_Normalase_10nt_UDI_",
"%_SI-NN-",
"%_SI-NT-",
"%_SI-TN-",
"%_SI-TS-",
"%_SI-TT-",
]
index_sets_8nt = [
"NexteraCD",
"NexteraXT_",
"Nextera16S_",
"Nextera FS Dual",
"SMARTerDNA_",
"SMARTerV2_",
"SMARTer_RNA_UD_",
"Swift_SNAP_",
"TAKARA_8nt_UDI_",
"TruSeqUDv2-UDI_",
"v2_Illumina_TruSeq_8nt_UD_",
"v2_NexteraXT_",
"xGen_8nt_UDI_",
]
pools = {x["pool"] for x in data}
for p in sorted(pools):
subset = [
i for i in data if i["pool"] == p and not is_special_idx(i["idx_name"])
]
if not subset:
continue
subset = sorted(subset, key=lambda d: d["sn"])
if NGISAMPLE_PAT.findall(subset[0].get("sn", "")):
project_id = subset[0]["sn"].split("_")[0]
project_info = Project(lims, id=project_id)
seq_platform = project_info.udf.get("Sequencing platform")
else:
# The error message is skipped here since the verify_samplename function will check the names of all samples
seq_platform = ""
idx1_len = list(set([len(i["idx1"]) for i in subset if i["idx1"]]))
idx2_len = list(set([len(i["idx2"]) for i in subset if i["idx2"]]))
if len(idx1_len) == len(idx2_len) == 1 and idx1_len[0] == idx2_len[0] == 8:
search_index_sets = index_sets_8nt
elif len(idx1_len) == len(idx2_len) == 1 and idx1_len[0] == idx2_len[0] == 10:
search_index_sets = index_sets_10nt
else:
message.append(
f"Unable to check index orientations due to index length for pool {p}"
)
continue
# Search through the index sets for the first and last samples in the pool to save time
flag_idx_search = False
for idx_set in search_index_sets:
cursor.execute(query.format(idx_set))
query_output = cursor.fetchall()
flag_first_sample = ""
flag_last_sample = ""
for out in query_output:
index1 = IDX_PAT.findall(out[0])[0][0]
index2 = IDX_PAT.findall(out[0])[0][1]
# Convert index 2 to RC for MiSeq projects
if seq_platform:
if "MISEQ" in seq_platform.upper():
index2 = rc(index2)
# Check the first sample
if subset[0]["idx1"] == index1 and subset[0]["idx2"] == index2:
flag_first_sample = "CORRECT"
elif subset[0]["idx1"] == rc(index1) and subset[0]["idx2"] == index2:
flag_first_sample = "Index1_RC"
elif subset[0]["idx1"] == index1 and subset[0]["idx2"] == rc(index2):
flag_first_sample = "Index2_RC"
elif subset[0]["idx1"] == rc(index1) and subset[0]["idx2"] == rc(
index2
):
flag_first_sample = "Index1_and_Index2_RC"
elif subset[0]["idx1"] == index2 and subset[0]["idx2"] == index1:
flag_first_sample = "Index1_and_Index2_Swapped"
elif subset[0]["idx1"] == rc(index2) and subset[0]["idx2"] == index1:
flag_first_sample = "Index1_and_Index2_Swapped_plus_Index1_RC"
elif subset[0]["idx1"] == index2 and subset[0]["idx2"] == rc(index1):
flag_first_sample = "Index1_and_Index2_Swapped_plus_Index2_RC"
elif subset[0]["idx1"] == rc(index2) and subset[0]["idx2"] == rc(
index1
):
flag_first_sample = (
"Index1_and_Index2_Swapped_plus_Index1_and_Index2_RC"
)
# Check the last sample
if subset[-1]["idx1"] == index1 and subset[-1]["idx2"] == index2:
flag_last_sample = "CORRECT"
elif subset[-1]["idx1"] == rc(index1) and subset[-1]["idx2"] == index2:
flag_last_sample = "Index1_RC"
elif subset[-1]["idx1"] == index1 and subset[-1]["idx2"] == rc(index2):
flag_last_sample = "Index2_RC"
elif subset[-1]["idx1"] == rc(index1) and subset[-1]["idx2"] == rc(
index2
):
flag_last_sample = "Index1_and_Index2_RC"
elif subset[0]["idx1"] == index2 and subset[0]["idx2"] == index1:
flag_last_sample = "Index1_and_Index2_Swapped"
elif subset[0]["idx1"] == rc(index2) and subset[0]["idx2"] == index1:
flag_last_sample = "Index1_and_Index2_Swapped_plus_Index1_RC"
elif subset[0]["idx1"] == index2 and subset[0]["idx2"] == rc(index1):
flag_last_sample = "Index1_and_Index2_Swapped_plus_Index2_RC"
elif subset[0]["idx1"] == rc(index2) and subset[0]["idx2"] == rc(
index1
):
flag_last_sample = (
"Index1_and_Index2_Swapped_plus_Index1_and_Index2_RC"
)
# Make a conclusion
if flag_first_sample == flag_last_sample == "CORRECT":
flag_idx_search = True
break
elif flag_first_sample == flag_last_sample == "Index1_RC":
message.append(
f"Seems that Index 1 needs to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif flag_first_sample == flag_last_sample == "Index2_RC":
message.append(
f"Seems that Index 2 needs to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif flag_first_sample == flag_last_sample == "Index1_and_Index2_RC":
message.append(
f"Seems that both Index 1 and Index 2 need to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif flag_first_sample == flag_last_sample == "Index1_and_Index2_Swapped":
message.append(
f"Seems that Index 1 and Index 2 are swapped for pool {p}"
)
flag_idx_search = True
break
elif (
flag_first_sample
== flag_last_sample
== "Index1_and_Index2_Swapped_plus_Index1_RC"
):
message.append(
f"Seems that Index 1 and Index 2 are swapped, and Index 1 needs to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif (
flag_first_sample
== flag_last_sample
== "Index1_and_Index2_Swapped_plus_Index2_RC"
):
message.append(
f"Seems that Index 1 and Index 2 are swapped, and Index 2 needs to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif (
flag_first_sample
== flag_last_sample
== "Index1_and_Index2_Swapped_plus_Index1_and_Index2_RC"
):
message.append(
f"Seems that Index 1 and Index 2 are swapped, and both Index 1 and Index 2 need to be converted to RC for pool {p}"
)
flag_idx_search = True
break
elif flag_first_sample != flag_last_sample:
message.append(f"Inconsistent Index pattern detected for pool {p}")
flag_idx_search = True
break
if not flag_idx_search:
message.append(
f"Unable to find matched index set to check orientation for pool {p}"
)

# Close connections
if connection:
cursor.close()
connection.close()

return message


def is_special_idx(idx_name):
if (
TENX_DUAL_PAT.findall(idx_name)
Expand Down Expand Up @@ -612,7 +408,6 @@ def main(lims, pid, auto):
message += verify_placement(data)
message += verify_indexes(data)
message += verify_samplename(data)
message += verify_orientation(data)
else:
message = check_index_distance(data)
warning_start = "**Warnings from Verify Indexes and Placement EPP: **\n"
Expand Down
3 changes: 3 additions & 0 deletions scripts/manage_demux_stats_thresholds.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, instrument, chemistry, paired, read_length):
"Version3",
"Version2",
"Version2Nano",
"Version2Micro",
"SP",
"S1",
"S2",
Expand Down Expand Up @@ -131,6 +132,8 @@ def set_exp_lane_clust(self):
self.exp_lane_clust = 10e6
elif self.chemistry == "Version2Nano":
self.exp_lane_clust = 750000
elif self.chemistry == "Version2Micro":
self.exp_lane_clust = 3000000
else:
if self.read_length >= 76 and self.read_length <= 301:
self.exp_lane_clust = 18e6
Expand Down
4 changes: 2 additions & 2 deletions scripts/parse_ba_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def main(lims, args):
xml_query, return_type = udf_to_xml[udf_name]

result = xml_results.find(f".//{xml_query}").text.strip()
if isinstance(return_type, int):
if return_type is int:
result = int(round(float(result), 0))
elif isinstance(return_type, float):
elif return_type is float:
result = float(result)

try:
Expand Down
31 changes: 15 additions & 16 deletions scripts/project_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,23 @@
# Verify sample IDs
def verify_sample_ids(project_id):
message = []
connection = psycopg2.connect(
user=config["username"],
host=config["url"],
database=config["db"],
password=config["password"],
)
cursor = connection.cursor()
# Query sample names with given project luid
query = (
"select sample.name from sample "
"inner join project on sample.projectid=project.projectid "
"where project.luid = '{}';"
"where project.luid = %s;"
)
cursor.execute(query.format(project_id))
query_output = cursor.fetchall()
with psycopg2.connect(
user=config["username"],
host=config["url"],
database=config["db"],
password=config["password"],
) as connection:
with connection.cursor() as cursor:
cursor.execute(query, (project_id,))
query_output = cursor.fetchall()

# Validate sample name format
for out in query_output:
sample_id = out[0]
if not NGISAMPLE_PAT.findall(sample_id):
Expand All @@ -49,11 +51,6 @@ def verify_sample_ids(project_id):
f"SAMPLE NAME WARNING: Sample ID {sample_id} does not match project ID {project_id}"
)

# Close connections
if connection:
cursor.close()
connection.close()

return message


Expand All @@ -64,7 +61,9 @@ def main(lims, pid):
# Validate sample IDs
message += verify_sample_ids(project.id)

if message:
if not message:
print(f"No issue detected for project {pid}")
else:
sys.stderr.write("; ".join(message))
sys.exit(2)

Expand Down

0 comments on commit 4aa9dd0

Please sign in to comment.