diff --git a/salmon.txt b/salmon.txt new file mode 100644 index 00000000..886461f1 --- /dev/null +++ b/salmon.txt @@ -0,0 +1,579 @@ + +Quant +========== +Perform dual-phase, selective-alignment-based estimation of +transcript abundance from RNA-seq reads + +salmon quant options: + + +mapping input options: + -l [ --libType ] arg Format string describing the library + type + -i [ --index ] arg salmon index + -r [ --unmatedReads ] arg List of files containing unmated reads + of (e.g. single-end reads) + -1 [ --mates1 ] arg File containing the #1 mates + -2 [ --mates2 ] arg File containing the #2 mates + + +basic options: + -v [ --version ] print version string + -h [ --help ] produce help message + -o [ --output ] arg Output quantification directory. + --seqBias Perform sequence-specific bias + correction. + --gcBias [beta for single-end reads] Perform + fragment GC bias correction. + --posBias Perform positional bias correction. + -p [ --threads ] arg (=16) The number of threads to use + concurrently. + --incompatPrior arg (=0) This option sets the prior probability + that an alignment that disagrees with + the specified library type (--libType) + results from the true fragment origin. + Setting this to 0 specifies that + alignments that disagree with the + library type should be "impossible", + while setting it to 1 says that + alignments that disagree with the + library type are no less likely than + those that do + -g [ --geneMap ] arg File containing a mapping of + transcripts to genes. If this file is + provided salmon will output both + quant.sf and quant.genes.sf files, + where the latter contains aggregated + gene-level abundance estimates. The + transcript to gene mapping should be + provided as either a GTF file, or a in + a simple tab-delimited format where + each line contains the name of a + transcript and the gene to which it + belongs separated by a tab. The + extension of the file is used to + determine how the file should be + parsed. Files ending in '.gtf', '.gff' + or '.gff3' are assumed to be in GTF + format; files with any other extension + are assumed to be in the simple format. + In GTF / GFF format, the + "transcript_id" is assumed to contain + the transcript identifier and the + "gene_id" is assumed to contain the + corresponding gene identifier. + --auxTargetFile arg A file containing a list of "auxiliary" + targets. These are valid targets + (i.e., not decoys) to which fragments + are allowed to map and be assigned, and + which will be quantified, but for which + auxiliary models like sequence-specific + and fragment-GC bias correction should + not be applied. + --meta If you're using Salmon on a metagenomic + dataset, consider setting this flag to + disable parts of the abundance + estimation model that make less sense + for metagenomic data. + + +options specific to mapping mode: + --discardOrphansQuasi [selective-alignment mode only] : + Discard orphan mappings in + selective-alignment mode. If this flag + is passed then only paired mappings + will be considered toward + quantification estimates. The default + behavior is to consider orphan mappings + if no valid paired mappings exist. + This flag is independent of the option + to write the orphaned mappings to file + (--writeOrphanLinks). + --validateMappings [*deprecated* (no effect; + selective-alignment is the default)] + --consensusSlack arg (=0.349999994) [selective-alignment mode only] : The + amount of slack allowed in the + selective-alignment filtering + mechanism. If this is set to a + fraction, X, greater than 0 (and in + [0,1)), then uniMEM chains with scores + below (100 * X)% of the best chain + score for a read, and read pairs with a + sum of chain scores below (100 * X)% of + the best chain score for a read pair + will be discounted as a mapping + candidates. The default value of this + option is 0.35. + --preMergeChainSubThresh arg (=0.75) [selective-alignment mode only] : The + threshold of sub-optimal chains, + compared to the best chain on a given + target, that will be retained and + passed to the next phase of mapping. + Specifically, if the best chain for a + read (or read-end in paired-end mode) + to target t has score X_t, then all + chains for this read with score >= X_t + * preMergeChainSubThresh will be + retained and passed to subsequent + mapping phases. This value must be in + the range [0, 1]. + --postMergeChainSubThresh arg (=0.90000000000000002) + [selective-alignment mode only] : The + threshold of sub-optimal chain pairs, + compared to the best chain pair on a + given target, that will be retained and + passed to the next phase of mapping. + This is different than + preMergeChainSubThresh, because this is + applied to pairs of chains (from the + ends of paired-end reads) after merging + (i.e. after checking concordancy + constraints etc.). Specifically, if + the best chain pair to target t has + score X_t, then all chain pairs for + this read pair with score >= X_t * + postMergeChainSubThresh will be + retained and passed to subsequent + mapping phases. This value must be in + the range [0, 1]. Note: This option is + only meaningful for paired-end + libraries, and is ignored for + single-end libraries. + --orphanChainSubThresh arg (=0.94999999999999996) + [selective-alignment mode only] : This + threshold sets a global sub-optimality + threshold for chains corresponding to + orphan mappings. That is, if the + merging procedure results in no + concordant mappings then only orphan + mappings with a chain score >= + orphanChainSubThresh * bestChainScore + will be retained and passed to + subsequent mapping phases. This value + must be in the range [0, 1]. Note: This + option is only meaningful for + paired-end libraries, and is ignored + for single-end libraries. + --scoreExp arg (=1) [selective-alignment mode only] : The + factor by which sub-optimal alignment + scores are downweighted to produce a + probability. If the best alignment + score for the current read is S, and + the score for a particular alignment is + w, then the probability will be + computed porportional to exp( - + scoreExp * (S-w) ). + --minScoreFraction arg [selective-alignment mode only] : The + fraction of the optimal possible + alignment score that a mapping must + achieve in order to be considered + "valid" --- should be in (0,1]. + Salmon Default 0.65 and Alevin Default + 0.87 + --mismatchSeedSkip arg (=3) [selective-alignment mode only] : After + a k-mer hit is extended to a uni-MEM, + the uni-MEM extension can terminate for + one of 3 reasons; the end of the read, + the end of the unitig, or a mismatch. + If the extension ends because of a + mismatch, this is likely the result of + a sequencing error. To avoid looking + up many k-mers that will likely fail to + be located in the index, the search + procedure skips by a factor of + mismatchSeedSkip until it either (1) + finds another match or (2) is k-bases + past the mismatch position. This value + controls that skip length. A smaller + value can increase sensitivity, while a + larger value can speed up seeding. + --disableChainingHeuristic [selective-alignment mode only] : By + default, the heuristic of (Li 2018) is + implemented, which terminates the + chaining DP once a given number of + valid backpointers are found. This + speeds up the seed (MEM) chaining step, + but may result in sub-optimal chains in + complex situations (e.g. sequences with + many repeats and overlapping repeats). + Passing this flag will disable the + chaining heuristic, and perform the + full chaining dynamic program, + guaranteeing the optimal chain is found + in this step. + --decoyThreshold arg (=1) [selective-alignment mode only] : For + an alignemnt to an annotated transcript + to be considered invalid, it must have + an alignment score < (decoyThreshold * + bestDecoyScore). A value of 1.0 means + that any alignment strictly worse than + the best decoy alignment will be + discarded. A smaller value will allow + reads to be allocated to transcripts + even if they strictly align better to + the decoy sequence. + --ma arg (=2) [selective-alignment mode only] : The + value given to a match between read and + reference nucleotides in an alignment. + --mp arg (=-4) [selective-alignment mode only] : The + value given to a mis-match between read + and reference nucleotides in an + alignment. + --go arg (=6) [selective-alignment mode only] : The + value given to a gap opening in an + alignment. + --ge arg (=2) [selective-alignment mode only] : The + value given to a gap extension in an + alignment. + --bandwidth arg (=15) [selective-alignment mode only] : The + value used for the bandwidth passed to + ksw2. A smaller bandwidth can make the + alignment verification run more + quickly, but could possibly miss valid + alignments. + --allowDovetail [selective-alignment mode only] : allow + dovetailing mappings. + --recoverOrphans [selective-alignment mode only] : + Attempt to recover the mates of + orphaned reads. This uses edlib for + orphan recovery, and so introduces some + computational overhead, but it can + improve sensitivity. + --mimicBT2 [selective-alignment mode only] : Set + flags to mimic parameters similar to + Bowtie2 with --no-discordant and + --no-mixed flags. This increases + disallows dovetailing reads, and + discards orphans. Note, this does not + impose the very strict parameters + assumed by RSEM+Bowtie2, like gapless + alignments. For that behavior, use the + --mimiStrictBT2 flag below. + --mimicStrictBT2 [selective-alignment mode only] : Set + flags to mimic the very strict + parameters used by RSEM+Bowtie2. This + increases --minScoreFraction to 0.8, + disallows dovetailing reads, discards + orphans, and disallows gaps in + alignments. + --softclip [selective-alignment mode only + (experimental)] : Allos soft-clipping + of reads during selective-alignment. If + this option is provided, then regions + at the beginning or end of the read can + be withheld from alignment without any + effect on the resulting score (i.e. + neither adding nor removing from the + score). This will drastically reduce + the penalty if there are mismatches at + the beginning or end of the read due to + e.g. low-quality bases or adapters. + NOTE: Even with soft-clipping enabled, + the read must still achieve a score of + at least minScoreFraction * maximum + achievable score, where the maximum + achievable score is computed based on + the full (un-clipped) read length. + --softclipOverhangs [selective-alignment mode only] : Allow + soft-clipping of reads that overhang + the beginning or ends of the + transcript. In this case, the + overhaning section of the read will + simply be unaligned, and will not + contribute or detract from the + alignment score. The default policy is + to force an end-to-end alignment of the + entire read, so that overhanings will + result in some deletion of nucleotides + from the read. + --fullLengthAlignment [selective-alignment mode only] : + Perform selective alignment over the + full length of the read, beginning from + the (approximate) initial mapping + location and using extension alignment. + This is in contrast with the default + behavior which is to only perform + alignment between the MEMs in the + optimal chain (and before the first and + after the last MEM if applicable). The + default strategy forces the MEMs to + belong to the alignment, but has the + benefit that it can discover indels + prior to the first hit shared between + the read and reference. Except in very + rare circumstances, the default mode + should be more accurate. + --hardFilter [selective-alignemnt mode only] : + Instead of weighting mappings by their + alignment score, this flag will discard + any mappings with sub-optimal alignment + score. The default option of + soft-filtering (i.e. weighting mappings + by their alignment score) usually + yields slightly more accurate abundance + estimates but this flag may be + desirable if you want more accurate + 'naive' equivalence classes, rather + than range factorized equivalence + classes. + --minAlnProb arg (=1.0000000000000001e-05) + [selective-alignment mode only] : Any + mapping whose alignment probability (as + computed by P(aln) = exp(-scoreExp * + difference from best mapping score) is + less than minAlnProb will not be + considered as a valid alignment for + this read. The goal of this flag is to + remove very low probability alignments + that are unlikely to have any + non-trivial effect on the final + quantifications. Filtering such + alignments reduces the number of + variables that need to be considered + and can result in slightly faster + inference and 'cleaner' equivalence + classes. + -z [ --writeMappings ] [=arg(=-)] If this option is provided, then the + selective-alignment results will be + written out in SAM-compatible format. + By default, output will be directed to + stdout, but an alternative file name + can be provided instead. + --writeQualities This flag only has meaning if mappings + are being written (with + --writeMappings/-z). If this flag is + provided, then the output SAM file will + contain quality strings as well as read + sequences. Note that this can greatly + increase the size of the output file. + --hitFilterPolicy arg (=AFTER) [selective-alignment mode only] : + Determines the policy by which hits are + filtered in selective alignment. + Filtering hits after chaining (the + default) is more sensitive, but more + computationally intensive, because it + performs the chaining dynamic program + for all hits. Filtering before + chaining is faster, but some true hits + may be missed. The options are BEFORE, + AFTER, BOTH and NONE. + + +advanced options: + --alternativeInitMode [Experimental]: Use an alternative + strategy (rather than simple + interpolation between) the online and + uniform abundance estimates to + initialize the EM / VBEM algorithm. + --auxDir arg (=aux_info) The sub-directory of the quantification + directory where auxiliary information + e.g. bootstraps, bias parameters, etc. + will be written. + --skipQuant Skip performing the actual transcript + quantification (including any Gibbs + sampling or bootstrapping). + --dumpEq Dump the simple equivalence class + counts that were computed during + mapping or alignment. + -d [ --dumpEqWeights ] Dump conditional probabilities + associated with transcripts when + equivalence class information is being + dumped to file. Note, this will dump + the factorization that is actually used + by salmon's offline phase for + inference. If you are using + range-factorized equivalence classes + (the default) then the same transcript + set may appear multiple times with + different associated conditional + probabilities. + --minAssignedFrags arg (=10) The minimum number of fragments that + must be assigned to the transcriptome + for quantification to proceed. + --reduceGCMemory If this option is selected, a more + memory efficient (but slightly slower) + representation is used to compute + fragment GC content. Enabling this will + reduce memory usage, but can also + reduce speed. However, the results + themselves will remain the same. + --biasSpeedSamp arg (=5) The value at which the fragment length + PMF is down-sampled when evaluating + sequence-specific & GC fragment bias. + Larger values speed up effective length + correction, but may decrease the + fidelity of bias modeling results. + --fldMax arg (=1000) The maximum fragment length to consider + when building the empirical + distribution + --fldMean arg (=250) The mean used in the fragment length + distribution prior + --fldSD arg (=25) The standard deviation used in the + fragment length distribution prior + -f [ --forgettingFactor ] arg (=0.65000000000000002) + The forgetting factor used in the + online learning schedule. A smaller + value results in quicker learning, but + higher variance and may be unstable. A + larger value results in slower learning + but may be more stable. Value should + be in the interval (0.5, 1.0]. + --initUniform initialize the offline inference with + uniform parameters, rather than seeding + with online parameters. + --maxOccsPerHit arg (=1000) When collecting "hits" (MEMs), hits + having more than maxOccsPerHit + occurrences won't be considered. + -w [ --maxReadOcc ] arg (=200) Reads "mapping" to more than this many + places won't be considered. + --maxRecoverReadOcc arg (=2500) Relevant for alevin with '--sketch' + mode only: if a read has valid seed + matches, but no read has matches + leading to fewer than "maxReadOcc" + mappings, then try to recover mappings + for this read as long as there are + fewer than "maxRecoverReadOcc" + mappings. + --noLengthCorrection [experimental] : Entirely disables + length correction when estimating the + abundance of transcripts. This option + can be used with protocols where one + expects that fragments derive from + their underlying targets without regard + to that target's length (e.g. QuantSeq) + --noEffectiveLengthCorrection Disables effective length correction + when computing the probability that a + fragment was generated from a + transcript. If this flag is passed in, + the fragment length distribution is not + taken into account when computing this + probability. + --noSingleFragProb Disables the estimation of an + associated fragment length probability + for single-end reads or for orphaned + mappings in paired-end libraries. The + default behavior is to consider the + probability of all possible fragment + lengths associated with the retained + mapping. Enabling this flag (i.e. + turning this default behavior off) will + simply not attempt to estimate a + fragment length probability in such + cases. + --noFragLengthDist [experimental] : Don't consider + concordance with the learned fragment + length distribution when trying to + determine the probability that a + fragment has originated from a + specified location. Normally, + Fragments with unlikely lengths will be + assigned a smaller relative probability + than those with more likely lengths. + When this flag is passed in, the + observed fragment length has no effect + on that fragment's a priori + probability. + --noBiasLengthThreshold [experimental] : If this option is + enabled, then no (lower) threshold will + be set on how short bias correction can + make effective lengths. This can + increase the precision of bias + correction, but harm robustness. The + default correction applies a threshold. + --numBiasSamples arg (=2000000) Number of fragment mappings to use when + learning the sequence-specific bias + model. + --numAuxModelSamples arg (=5000000) The first are used + to train the auxiliary model parameters + (e.g. fragment length distribution, + bias, etc.). After ther first + observations the + auxiliary model parameters will be + assumed to have converged and will be + fixed. + --numPreAuxModelSamples arg (=5000) The first will + have their assignment likelihoods and + contributions to the transcript + abundances computed without applying + any auxiliary models. The purpose of + ignoring the auxiliary models for the + first + observations is to avoid applying these + models before their parameters have + been learned sufficiently well. + --useEM Use the traditional EM algorithm for + optimization in the batch passes. + --useVBOpt Use the Variational Bayesian EM + [default] + --rangeFactorizationBins arg (=4) Factorizes the likelihood used in + quantification by adopting a new notion + of equivalence classes based on the + conditional probabilities with which + fragments are generated from different + transcripts. This is a more + fine-grained factorization than the + normal rich equivalence classes. The + default value (4) corresponds to the + default used in Zakeri et al. 2017 + (doi: 10.1093/bioinformatics/btx262), + and larger values imply a more + fine-grained factorization. If range + factorization is enabled, a common + value to select for this parameter is + 4. A value of 0 signifies the use of + basic rich equivalence classes. + --numGibbsSamples arg (=0) Number of Gibbs sampling rounds to + perform. + --noGammaDraw This switch will disable drawing + transcript fractions from a Gamma + distribution during Gibbs sampling. In + this case the sampler does not account + for shot-noise, but only assignment + ambiguity + --numBootstraps arg (=0) Number of bootstrap samples to + generate. Note: This is mutually + exclusive with Gibbs sampling. + --bootstrapReproject This switch will learn the parameter + distribution from the bootstrapped + counts for each sample, but will + reproject those parameters onto the + original equivalence class counts. + --thinningFactor arg (=16) Number of steps to discard for every + sample kept from the Gibbs chain. The + larger this number, the less chance + that subsequent samples are + auto-correlated, but the slower + sampling becomes. + -q [ --quiet ] Be quiet while doing quantification + (don't write informative output to the + console unless something goes wrong). + --perTranscriptPrior The prior (either the default or the + argument provided via --vbPrior) will + be interpreted as a transcript-level + prior (i.e. each transcript will be + given a prior read count of this value) + --perNucleotidePrior The prior (either the default or the + argument provided via --vbPrior) will + be interpreted as a nucleotide-level + prior (i.e. each nucleotide will be + given a prior read count of this value) + --sigDigits arg (=3) The number of significant digits to + write when outputting the + EffectiveLength and NumReads columns + --vbPrior arg (=0.01) The prior that will be used in the VBEM + algorithm. This is interpreted as a + per-transcript prior, unless the + --perNucleotidePrior flag is also + given. If the --perNucleotidePrior + flag is given, this is used as a + nucleotide-level prior. If the default + is used, it will be divided by 1000 + before being used as a nucleotide-level + prior, i.e. the default per-nucleotide + prior will be 1e-5. + --writeOrphanLinks Write the transcripts that are linked + by orphaned reads. + --writeUnmappedNames Write the names of un-mapped reads to + the file unmapped_names.txt in the + auxiliary directory. + diff --git a/seqnado/config.py b/seqnado/config.py index 35bf07ae..1d49f165 100644 --- a/seqnado/config.py +++ b/seqnado/config.py @@ -86,7 +86,7 @@ def setup_configuration(assay, genome, template_data): "Path to fastqscreen config:", default="/ceph/project/milne_group/shared/seqnado_reference/fastqscreen_reference/fastq_screen.conf", ) - + # Blacklist template_data["remove_blacklist"] = get_user_input( "Do you want to remove blacklist regions? (yes/no)", @@ -108,8 +108,8 @@ def setup_configuration(assay, genome, template_data): ) # Library Complexity template_data["library_complexity"] = get_user_input( - "Calculate library complexity? (yes/no)", default="no", is_boolean=True - ) + "Calculate library complexity? (yes/no)", default="no", is_boolean=True + ) else: template_data["remove_pcr_duplicates_method"] = "False" template_data["library_complexity"] = "False" @@ -169,6 +169,26 @@ def setup_configuration(assay, genome, template_data): choices=["lanceotron", "macs", "homer"], ) + # RNA options + template_data["rna_quantification"] = ( + get_user_input( + "RNA quantification method:", + default="feature_counts", + choices=["feature_counts", "salmon"], + ) + if assay == "rna" + else "False" + ) + + template_data["salmon_index"] = ( + get_user_input( + "Path to salmon index:", + default="/ceph/project/milne_group/shared/salmon_ref/hg38/cDNA/Homo_sapiens.GRCh38.cdna.all.fa.gz", + ) + if template_data["rna_quantification"] == "salmon" + else "False" + ) + # Run DESeq2 template_data["run_deseq2"] = ( get_user_input("Run DESeq2? (yes/no)", default="no", is_boolean=True) @@ -256,6 +276,10 @@ def setup_configuration(assay, genome, template_data): threads: 16 options: -s 0 -p --countReadPairs -t exon -g gene_id +salmon: + threads: 16 + options: --libType A + homer: maketagdirectory: makebigwig: diff --git a/seqnado/design.py b/seqnado/design.py index 8b54865c..605fc7df 100644 --- a/seqnado/design.py +++ b/seqnado/design.py @@ -958,10 +958,14 @@ class RNAOutput(Output): assay: Literal["RNA"] project_name: str run_deseq2: bool = False + rna_quantification: Optional[Literal["feature_counts", "salmon"]] = None @property def counts(self): - return ["seqnado_output/feature_counts/read_counts.tsv"] + if self.rna_quantification == "feature_counts": + return ["seqnado_output/quantification/feature_counts/read_counts.tsv"] + elif self.rna_quantification == "salmon": + return ["seqnado_output/quantification/salmon/quant.sf"] @property def deseq2(self): diff --git a/seqnado/workflow/config/config.yaml.jinja b/seqnado/workflow/config/config.yaml.jinja index ae041425..50b596e2 100755 --- a/seqnado/workflow/config/config.yaml.jinja +++ b/seqnado/workflow/config/config.yaml.jinja @@ -39,6 +39,9 @@ make_heatmaps: "{{make_heatmaps}}" call_peaks: "{{call_peaks}}" peak_calling_method: "{{peak_calling_method}}" +rna_quantification: "{{rna_quantification}}" +salmon_index: "{{salmon_index}}" + run_deseq2: "{{run_deseq2}}" diff --git a/seqnado/workflow/rules/alignment_counts.smk b/seqnado/workflow/rules/alignment_counts.smk index cdc9b314..d21db4cd 100644 --- a/seqnado/workflow/rules/alignment_counts.smk +++ b/seqnado/workflow/rules/alignment_counts.smk @@ -1,14 +1,12 @@ from seqnado.helpers import check_options - - rule feature_counts: input: bam=expand("seqnado_output/aligned/{sample}.bam", sample=SAMPLE_NAMES), bai=expand("seqnado_output/aligned/{sample}.bam.bai", sample=SAMPLE_NAMES), annotation=config["genome"]["gtf"], output: - counts="seqnado_output/feature_counts/read_counts.tsv", + counts="seqnado_output/quantification/feature_counts/read_counts.tsv", params: options=check_options(config["featurecounts"]["options"]), threads: config["featurecounts"]["threads"] @@ -16,7 +14,7 @@ rule feature_counts: mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB", runtime="2h", log: - "seqnado_output/logs/readcounts/featurecounts/featurecounts.log", + "seqnado_output/logs/quantification/featurecounts/featurecounts.log", shell: """ featureCounts \ @@ -30,3 +28,44 @@ rule feature_counts: {input.bam} \ > {log} 2>&1 """ + +rule salmon_counts_paired: + input: + fq1="seqnado_output/fastq/{sample}_1.fastq.gz", sample=SAMPLE_NAMES, + fq2="seqnado_output/fastq/{sample}_2.fastq.gz", sample=SAMPLE_NAMES, + output: + counts="seqnado_output/quantification/salmon/quant.sf", + params: + index=config["salmon"]["index"], + options=check_options(config["salmon"]["options"]), + threads: config["salmon"]["threads"] + resources: + mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB", + runtime="2h", + log: + "seqnado_output/logs/readcounts/salmon/salmon.log", + shell: + """ + salmon quant -p -t {params.index} {params.options} -1 {input.fq1} -2 {input.fq2} -p {threads} -o seqnado_output/quantification/salmon + """ + +rule salmon_counts_single: + input: + fq="seqnado_output/fastq/{sample}.fastq.gz", sample=SAMPLE_NAMES, + output: + counts="seqnado_output/quantification/salmon/quant.sf", + params: + index=config["salmon"]["index"], + options=check_options(config["salmon"]["options"]), + threads: config["salmon"]["threads"] + resources: + mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB", + runtime="2h", + log: + "seqnado_output/logs/readcounts/salmon/salmon.log", + shell: + """ + salmon quant -t {params.index} {params.options} -r {input.fq} -p {threads} -o seqnado_output/quantification/salmon + """ + +ruleorder: feature_counts > salmon_counts_paired > salmon_counts_single diff --git a/seqnado/workflow/rules/transcript_counts.smk b/seqnado/workflow/rules/transcript_counts.smk deleted file mode 100644 index c0fa880d..00000000 --- a/seqnado/workflow/rules/transcript_counts.smk +++ /dev/null @@ -1,21 +0,0 @@ -from seqnado.helpers import check_options - - -rule salmon_counts_single: - input: - fq="seqnado_output/fastq/{sample}.fastq.gz", sample=SAMPLE_NAMES, - output: - counts="seqnado_output/salmon_counts/quant.sf", - params: - index=config["salmon"]["index"], - options=check_options(config["salmon"]["options"]), - threads: config["salmon"]["threads"] - resources: - mem=lambda wildcards, attempt: f"{3 * 2 ** (attempt)}GB", - runtime="2h", - log: - "seqnado_output/logs/readcounts/salmon/salmon.log", - shell: - """ - salmon quant -i {params.index} -l A -r {input.fq} -p {threads} -o seqnado_output/salmon_counts - """