Skip to content

Commit

Permalink
altos auto chemistry update
Browse files Browse the repository at this point in the history
  • Loading branch information
wzheng0520 committed Nov 4, 2024
1 parent 4171377 commit 263759a
Show file tree
Hide file tree
Showing 16 changed files with 2,168 additions and 16 deletions.
1,211 changes: 1,211 additions & 0 deletions CHANGELOG_ALTOS.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions assets/protocols.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"alevin": {
"10XV1": {
"protocol": "10xv1",
"protocol": "1{b[14]u[10]x:}2{r:}",
"whitelist": "assets/whitelist/10x_V1_barcode_whitelist.txt.gz"
},
"10XV2": {
Expand All @@ -13,7 +13,7 @@
"whitelist": "assets/whitelist/10x_V3_barcode_whitelist.txt.gz"
},
"10XV4": {
"protocol": "10xv4",
"protocol": "1{b[16]u[12]x:}2{r:}",
"whitelist": "assets/whitelist/10x_V4_barcode_whitelist.txt.gz"
},
"dropseq": {
Expand Down
10 changes: 10 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,18 @@ process {
enabled: false
]
}
withName: AUTO_DETECT_PROTOCOL {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: false
]
}
}



if(params.aligner == "cellranger") {
process {
withName: CELLRANGER_MKGTF {
Expand Down
95 changes: 95 additions & 0 deletions modules/local/auto_detect_protocol.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

process AUTO_DETECT_PROTOCOL {
tag "$meta.id"
label 'process_single'

conda 'conda-forge::jq=1.6'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jq:1.6' :
'biocontainers/jq:1.6' }"

input:
// the first FastQ file in `reads` is expected to contain the cell barcodes
tuple val(meta), path(reads)
val aligner
path protocol_json
path barcode_whitelist

output:
tuple val(meta), path(reads), emit: ch_fastq
env PROTOCOL, emit: protocol
env EXTRA_ARGS, emit: extra_args
path "*.txt.gz", emit: whitelist
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
# convert protocols.json to table
TABLE=\$(
jq -r '
."$aligner" |
to_entries[] |
"\\(.key)\\t\\(.value.protocol//"")\\t\\(.value.whitelist//"")\\t\\(.value.extra_args//"")"
' "${protocol_json}"
)
# iterate over all protocols defined for the selected aligner
MATCHING_FRACTIONS=\$(cut -f1 <<<"\$TABLE" | while read KEY; do
# uncompress whitelist
WHITELIST=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f3)
[ -n "\$WHITELIST" ] || continue # skip protocols without whitelist
WHITELIST_FILE=\$(basename "\$WHITELIST")
gzip -dcf "\$WHITELIST_FILE" > barcodes
# subsample the FastQ file
gzip -dcf "${reads[0]}" |
awk 'FNR % 4 == 2' | # extract the read sequence from FastQ
head -n 100000 > reads || true # the first 100k reads should suffice
# extract the barcodes from the FastQ reads and count how many are valid barcodes
awk -v KEY="\$KEY" -v OFS='\\t' '
{ \$0 = substr(\$0, 1, 14) } # the barcode is in the first 14 bases; 10X V2/3 barcodes are trimmed
FILENAME == "barcodes" { barcodes[\$0] } # cache barcodes in memory
FILENAME == "reads" && \$0 in barcodes { count++ } # count matches for each chemistry
END { print KEY, count/FNR } # output fraction of matching barcodes for each chemitry
' barcodes reads
done | sort -k2,2gr)
# only trust the auto-detection if exactly one protocol matches
echo -e "These were the fractions of matching barcodes by protocol:\\n\$MATCHING_FRACTIONS"
MATCHING_PROTOCOLS_COUNT=\$(awk '\$2>=0.7' <<<"\$MATCHING_FRACTIONS" | wc -l)
if [ \$MATCHING_PROTOCOLS_COUNT -ne 1 ]; then
echo "ERROR: Found \$MATCHING_PROTOCOLS_COUNT matching protocols."
exit 1
fi
KEY=\$(cut -f1 <<<"\$MATCHING_FRACTIONS" | head -n1)
# extract attributes of chosen protocol
PROTOCOL=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f2)
WHITELIST_PATH=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f3)
WHITELIST=\$(basename "\$WHITELIST_PATH")
# Remove all other whitelist files
for file in \$PWD/*.txt.gz; do
FILE_NAME=\$(basename "\$file")
[ "\$FILE_NAME" != "\$WHITELIST" ] && rm "\$FILE_NAME"
done
# Copy the chosen whitelist file
cp "\$WHITELIST" "whitelist.txt.gz"
EXTRA_ARGS=\$(grep -w "^\$KEY" <<<"\$TABLE" | cut -f4)
echo \$PWD/*.txt.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
jq: \$(jq --version | cut -d- -f2)
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions modules/local/simpleaf_index.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ process SIMPLEAF_INDEX {
simpleaf set-paths
# run simpleaf index
simpleaf \\
index \\
--threads $task.cpus \\
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/cat/fastq/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 84 additions & 0 deletions modules/nf-core/cat/fastq/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions modules/nf-core/cat/fastq/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions modules/nf-core/cat/fastq/nextflow.config

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 263759a

Please sign in to comment.