From 97dda5a5d37561fe5f946bc792c36273877bb445 Mon Sep 17 00:00:00 2001 From: Corey Hayford Date: Wed, 20 Dec 2023 15:06:08 +0000 Subject: [PATCH 1/6] Updating file name matching patterns for hotfix --- .../cellranger/count/templates/cellranger_count.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 4bfb9f4f..c8165ead 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -31,14 +31,21 @@ def chunk_iter(seq, size): fastq_all = Path("./fastq_all") fastq_all.mkdir(exist_ok=True) +# FIXME: Generalize this filename pattern once we better understand it +""" +Old file name matching from NF-core # Match R1 in the filename, but only if it is followed by a non-digit or non-character # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but # do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" +# filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" +""" +filename_pattern = r"(.+)_1" for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + # FIXME: Generalize this pattern switching function once we standardize the filename pattern + # if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: raise AssertionError( dedent( f"""\ From 176d0998566c35639530aa4d13b2461f3a72fd4c Mon Sep 17 00:00:00 2001 From: Corey Hayford Date: Wed, 20 Dec 2023 15:32:39 +0000 Subject: [PATCH 2/6] Adding some print statements to see why it's working on my machine but not remote --- modules/nf-core/cellranger/count/templates/cellranger_count.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index c8165ead..afbc1c5b 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -40,11 +40,13 @@ def chunk_iter(seq, size): # filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" """ filename_pattern = r"(.+)_1" +print(filename_pattern) for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' # FIXME: Generalize this pattern switching function once we standardize the filename pattern # if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + print(re.sub(filename_pattern, r"\1_2", r1.name)) if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: raise AssertionError( dedent( From a492326ad6e6fb2acde61f9c3ce56ce0c3ddf1c3 Mon Sep 17 00:00:00 2001 From: Corey Hayford Date: Wed, 20 Dec 2023 15:53:55 +0000 Subject: [PATCH 3/6] Add print statement --- modules/nf-core/cellranger/count/templates/cellranger_count.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index afbc1c5b..37613ddf 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -46,6 +46,7 @@ def chunk_iter(seq, size): # double escapes are required because nextflow processes this python 'template' # FIXME: Generalize this pattern switching function once we standardize the filename pattern # if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + print(r1.name) print(re.sub(filename_pattern, r"\1_2", r1.name)) if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: raise AssertionError( From 62e6540c77b751f328af6f6f4fb9a47aa6ca7a83 Mon Sep 17 00:00:00 2001 From: Corey Hayford Date: Wed, 20 Dec 2023 17:53:39 +0000 Subject: [PATCH 4/6] Trying different file renaming approach --- .../cellranger/count/templates/cellranger_count.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 37613ddf..45eaf4ac 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -39,16 +39,21 @@ def chunk_iter(seq, size): # do not match "SRR12345", "file_INFIXR12", etc # filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" """ +# FIXME: This is working locally but not on AWS Batch filename_pattern = r"(.+)_1" -print(filename_pattern) +# print(filename_pattern) for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): # double escapes are required because nextflow processes this python 'template' # FIXME: Generalize this pattern switching function once we standardize the filename pattern # if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + # FIXME: This is working locally but not on AWS Batch + # if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: print(r1.name) + print(r2.name) print(re.sub(filename_pattern, r"\1_2", r1.name)) - if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: + print(r1.name.replace("_1", "_2")) + if r1.name.replace("_1", "_2") != r2.name: raise AssertionError( dedent( f"""\ From dc436f24cf641a82775e3aa09ac32e5fcfe0708c Mon Sep 17 00:00:00 2001 From: Corey Hayford Date: Wed, 20 Dec 2023 21:56:00 +0000 Subject: [PATCH 5/6] Cleaning up print statements. Modified the regular expression check for R1/R2 FASTQs based on standard outputs from fetchNGS --- .../nf-core/cellranger/count/templates/cellranger_count.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 45eaf4ac..4d572af0 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -40,7 +40,7 @@ def chunk_iter(seq, size): # filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" """ # FIXME: This is working locally but not on AWS Batch -filename_pattern = r"(.+)_1" +# filename_pattern = r"(.+)_1" # print(filename_pattern) for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): @@ -49,10 +49,6 @@ def chunk_iter(seq, size): # if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: # FIXME: This is working locally but not on AWS Batch # if re.sub(filename_pattern, r"\1_2", r1.name) != r2.name: - print(r1.name) - print(r2.name) - print(re.sub(filename_pattern, r"\1_2", r1.name)) - print(r1.name.replace("_1", "_2")) if r1.name.replace("_1", "_2") != r2.name: raise AssertionError( dedent( From c6f418efd897ed198c0b28d087fa3621baac7746 Mon Sep 17 00:00:00 2001 From: corey-hayford Date: Tue, 13 Feb 2024 13:22:30 -0500 Subject: [PATCH 6/6] Changing default behavior of cellranger to exclude introns --- modules/nf-core/cellranger/count/templates/cellranger_count.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 4d572af0..76878969 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -66,11 +66,13 @@ def chunk_iter(seq, size): r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") +# FIXME: Add toggle for introns based on sc vs snRNA run( # fmt: off [ "cellranger", "count", "--id", "${prefix}", + '--include-introns', 'false', "--fastqs", str(fastq_all), "--transcriptome", "${reference.name}", "--localcores", "${task.cpus}",