Skip to content

Commit

Permalink
Initial development
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjtaylor committed Oct 10, 2023
1 parent d9c2ba4 commit 73ecfd8
Show file tree
Hide file tree
Showing 14 changed files with 1,527 additions and 1 deletion.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.DS_Store
.nextflow*
work/
outputs/
data/
test_samplesheet.csv
44 changes: 43 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,44 @@
# nf-vectra-to-htan
A NextFlow workflow to prepare Vectra mIF qptiff files for the HTAN DCC

A NextFlow workflow to prepare Vectra mIF qptiff files for the HTAN DCC.

This workflow takes qptiff files from Vectra miF images and outputs OME-TIFF
images suitable for submission to the HTAN DCC with the Imaging Level 2 template.

It performs the following steps

- Converts the first series in the qptiff (the full resolution image) to OME-TIFF via `bioformats2raw` and `raw2ometiff`. Other images included in the qptiff (the `thumbnail`, `overview` and `label`) are discarded.
- Removes `AcquisitionDate` and `StructuredAnnotations` from the OME-XML
- Removes `DateTime` from the TIFF tags.

It outputs a tiled, pyramidal, single scene OME-TIFF file.

### Requirements

- [NextFlow](https://nextflow.io/)
- [Docker](https://docs.docker.com/engine/install/)

### Usage

```
nextflow run ncihtan/nf-vectra-to-htan --input <path-to-samplesheet>
```

### Inputs

Create a CSV samplesheet containing one column called `image`. for example:

```
image
path/to/myimage.qptiff
s3://mybucket/myimage.qptiff
```

### Outputs

By default this outputs into a new directory called `outputs` in your current working directory.

### Parameters

- `outdir`: Directory for outputs (default: "`outputs`")
- `suffix`: Suffix for output files (default: "`_htan`")
38 changes: 38 additions & 0 deletions bin/clean_ometiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python

import sys
import tifftools
import ome_types
import os

input = sys.argv[1]


def split_all_ext(filename):
basename = filename
extensions = []
while "." in basename:
basename, ext = os.path.splitext(basename)
extensions.append(ext)
return basename, "".join(reversed(extensions))


insert_string = "_cleaned"

basename, all_ext = split_all_ext(input)

new_filename = f"{basename}{insert_string}{all_ext}"
print(new_filename)

ome = ome_types.from_tiff(input)
ome.structured_annotations.clear()
for i, exp in enumerate(ome.experimenters):
ome.experimenters[i].email = None
ome.experimenters[i].first_name = None
ome.experimenters[i].last_name = None
for i, img in enumerate(ome.images):
ome.images[i].acquisition_date = None

set_list = [(tifftools.Tag.IMAGEDESCRIPTION, ome_types.to_xml(ome))]

tifftools.tiff_set(input, overwrite=True, setlist=set_list)
41 changes: 41 additions & 0 deletions bin/clean_tiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python

import argparse
import tifftools
import os

import argparse

parser = argparse.ArgumentParser()

parser.add_argument("input")
parser.add_argument("--suffix", default="_cleaned")

args = parser.parse_args()


def split_all_ext(filename):
basename = filename
extensions = []
while "." in basename:
basename, ext = os.path.splitext(basename)
extensions.append(ext)
return basename, "".join(reversed(extensions))


basename, all_ext = split_all_ext(args.input)

new_filename = f"{basename}{args.suffix}{all_ext}"
print(new_filename)


unset_list = [
"DateTime",
]

tifftools.tiff_set(
args.input,
output=new_filename,
overwrite=False,
unset=unset_list
)
1,205 changes: 1,205 additions & 0 deletions dump.txt

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env nextflow
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ncihtan/nf-vectra-to-htan
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Github : https://ncihtan/nf-vectra-to-htan
----------------------------------------------------------------------------------------
*/

nextflow.enable.dsl = 2

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WORKFLOW PARAMETER VALUES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

params.outdir = 'outputs' // Directory for outputs
params.suffix = '_htan' // Suffix for processed files

if (params.input) {
params.input = file(params.input)
} else {
exit 1, 'Input samplesheet not specified!'
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { VECTRA2HTAN } from './workflows/vectra2htan.nf'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN ALL WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow {
VECTRA2HTAN ()
}
15 changes: 15 additions & 0 deletions modules/clean_ome.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process clean_ome {
container = 'ghcr.io/ncihtan/nf-imagecleaner'
input:
tuple val(meta), file(image)
output:
tuple val(meta), file(image)
stub:
"""
touch image_cleaned.ome.tiff
"""
script:
"""
clean_ometiff.py $image
"""
}
15 changes: 15 additions & 0 deletions modules/clean_tiff.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process clean_tiff {
container 'ghcr.io/ncihtan/nf-imagecleaner'
input:
tuple val(meta), file(image)
output:
tuple val(meta), file('*.ome.tiff')
publishDir "$params.outdir/", mode: 'copy', overwrite: true
"""
touch image_cleaned.ome.tiff
"""
script:
"""
clean_tiff.py $image --suffix $params.suffix
"""
}
19 changes: 19 additions & 0 deletions modules/qptiff2ometiff.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process QPTIFF2OMETIFF {
tag {"$meta.id"}
label "process_medium"
container 'ghcr.io/sage-bionetworks-workflows/nf-artist:latest'
input:
tuple val(meta), file(image)
output:
tuple val(meta), file("${image.simpleName}.ome.tiff")
stub:
"""
touch raw_dir
touch "${image.simpleName}.ome.tiff"
"""
script:
"""
bioformats2raw $image 'raw_dir' -s 0
raw2ometiff 'raw_dir' "${image.simpleName}.ome.tiff"
"""
}
37 changes: 37 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// nextflow.config

docker.enabled = true

profiles {
test { includeConfig 'conf/test.config'}
sage { includeConfig 'conf/sage.config'}
tower {
process {
cpus = {1 * task.attempt}
memory = {2.GB * task.attempt}
maxRetries = 3
errorStrategy = {task.attempt <= 2 ? 'retry' : 'ignore' }
withLabel: process_low {
cpus = {1 * task.attempt}
memory = {2.GB * task.attempt}
maxRetries = 3
errorStrategy = {task.attempt <= 2 ? 'retry' : 'ignore' }
}
withLabel: process_medium {
cpus = {4 * task.attempt}
memory = {8.GB * task.attempt}
maxRetries = 3
errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
}
withLabel: process_high {
cpus = {8 * task.attempt}
memory = {16.GB * task.attempt}
maxRetries = 3
errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
}
}
}
}



14 changes: 14 additions & 0 deletions subworkflows/convert.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
include { QPTIFF2OMETIFF } from '../modules/qptiff2ometiff.nf'

workflow CONVERT {
take: images

main:

QPTIFF2OMETIFF( images )

QPTIFF2OMETIFF.out
.set { converted }

emit: converted
}
15 changes: 15 additions & 0 deletions subworkflows/deid.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
include { clean_ome } from "../modules/clean_ome.nf"
include { clean_tiff } from "../modules/clean_tiff.nf"


workflow DEID {
take:
images

main:

images | clean_ome | clean_tiff | set { cleaned }

emit:
cleaned
}
24 changes: 24 additions & 0 deletions subworkflows/samplesheet_split.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
workflow SAMPLESHEET_SPLIT {
take:
samplesheet
main:
Channel
.fromPath(samplesheet)
.splitCsv (header:true, sep:',' )
// Make meta map from the samplesheet
.map {
row ->
def meta = [:]
if (row.id ) {
meta.id = row.id
} else {
meta.id = file(row.image).simpleName
}
image = file(row.image)
[meta, image]
}
.set {images }

emit:
images
}
12 changes: 12 additions & 0 deletions workflows/vectra2htan.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
include { SAMPLESHEET_SPLIT } from '../subworkflows/samplesheet_split.nf'
include { CONVERT } from '../subworkflows/convert.nf'
// include { GET_METADATA } from '../subworkflows/get_metadata.nf'
include { DEID } from '../subworkflows/deid.nf'

workflow VECTRA2HTAN {
SAMPLESHEET_SPLIT ( params.input )
CONVERT( SAMPLESHEET_SPLIT.out.images )
CONVERT.out.converted.set{converted}
// GET_METADATA( converted )
DEID( converted )
}

0 comments on commit 73ecfd8

Please sign in to comment.