Initial development

ncihtan · Oct 10, 2023 · 73ecfd8 · 73ecfd8
1 parent d9c2ba4
commit 73ecfd8
Show file tree

Hide file tree

Showing 14 changed files with 1,527 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+.DS_Store
+.nextflow*
+work/
+outputs/
+data/
+test_samplesheet.csv
diff --git a/README.md b/README.md
@@ -1,2 +1,44 @@
 # nf-vectra-to-htan
- A NextFlow workflow to prepare Vectra mIF qptiff files for the HTAN DCC
+
+A NextFlow workflow to prepare Vectra mIF qptiff files for the HTAN DCC.
+
+This workflow takes qptiff files from Vectra miF images and outputs OME-TIFF 
+images suitable for submission to the HTAN DCC with the Imaging Level 2 template.
+
+It performs the following steps
+
+- Converts the first series in the qptiff (the full resolution image) to OME-TIFF via `bioformats2raw` and `raw2ometiff`. Other images included in the qptiff (the `thumbnail`, `overview` and `label`) are discarded.
+- Removes `AcquisitionDate` and `StructuredAnnotations` from the OME-XML
+- Removes `DateTime` from the TIFF tags.
+
+It outputs a tiled, pyramidal, single scene OME-TIFF file.
+
+### Requirements
+
+- [NextFlow](https://nextflow.io/)
+- [Docker](https://docs.docker.com/engine/install/)
+
+### Usage
+
+```
+nextflow run ncihtan/nf-vectra-to-htan --input <path-to-samplesheet>
+```
+
+### Inputs
+
+Create a CSV samplesheet containing one column called `image`. for example:
+
+```
+image
+path/to/myimage.qptiff
+s3://mybucket/myimage.qptiff
+```
+
+### Outputs
+
+By default this outputs into a new directory called `outputs` in your current working directory.
+
+### Parameters
+
+- `outdir`: Directory for outputs (default: "`outputs`")
+- `suffix`: Suffix for output files (default: "`_htan`")
diff --git a/bin/clean_ometiff.py b/bin/clean_ometiff.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+import sys
+import tifftools
+import ome_types
+import os
+
+input = sys.argv[1]
+
+
+def split_all_ext(filename):
+    basename = filename
+    extensions = []
+    while "." in basename:
+        basename, ext = os.path.splitext(basename)
+        extensions.append(ext)
+    return basename, "".join(reversed(extensions))
+
+
+insert_string = "_cleaned"
+
+basename, all_ext = split_all_ext(input)
+
+new_filename = f"{basename}{insert_string}{all_ext}"
+print(new_filename)
+
+ome = ome_types.from_tiff(input)
+ome.structured_annotations.clear()
+for i, exp in enumerate(ome.experimenters):
+    ome.experimenters[i].email = None
+    ome.experimenters[i].first_name = None
+    ome.experimenters[i].last_name = None
+for i, img in enumerate(ome.images):
+    ome.images[i].acquisition_date = None
+
+set_list = [(tifftools.Tag.IMAGEDESCRIPTION, ome_types.to_xml(ome))]
+
+tifftools.tiff_set(input, overwrite=True, setlist=set_list)
diff --git a/bin/clean_tiff.py b/bin/clean_tiff.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+
+import argparse
+import tifftools
+import os
+
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("input")
+parser.add_argument("--suffix", default="_cleaned")
+
+args = parser.parse_args()
+
+
+def split_all_ext(filename):
+    basename = filename
+    extensions = []
+    while "." in basename:
+        basename, ext = os.path.splitext(basename)
+        extensions.append(ext)
+    return basename, "".join(reversed(extensions))
+
+
+basename, all_ext = split_all_ext(args.input)
+
+new_filename = f"{basename}{args.suffix}{all_ext}"
+print(new_filename)
+
+
+unset_list = [
+    "DateTime",
+]
+
+tifftools.tiff_set(
+    args.input,
+    output=new_filename,
+    overwrite=False,
+    unset=unset_list
+)
diff --git a/dump.txt b/dump.txt
diff --git a/main.nf b/main.nf
@@ -0,0 +1,43 @@
+#!/usr/bin/env nextflow
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    ncihtan/nf-vectra-to-htan
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Github : https://ncihtan/nf-vectra-to-htan
+----------------------------------------------------------------------------------------
+*/
+
+nextflow.enable.dsl = 2
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    WORKFLOW PARAMETER VALUES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+params.outdir       = 'outputs'     // Directory for outputs
+params.suffix    = '_htan'    // Suffix for processed files
+
+if (params.input) {
+  params.input = file(params.input)
+  } else {
+    exit 1, 'Input samplesheet not specified!'
+  }
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    NAMED WORKFLOWS FOR PIPELINE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { VECTRA2HTAN } from './workflows/vectra2htan.nf'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN ALL WORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow {
+  VECTRA2HTAN ()
+}
diff --git a/modules/clean_ome.nf b/modules/clean_ome.nf
@@ -0,0 +1,15 @@
+process clean_ome {
+  container = 'ghcr.io/ncihtan/nf-imagecleaner'
+  input:
+    tuple val(meta), file(image)
+  output:
+    tuple val(meta), file(image)
+  stub:
+  """
+  touch image_cleaned.ome.tiff
+  """
+  script:
+  """
+  clean_ometiff.py $image
+  """
+}
diff --git a/modules/clean_tiff.nf b/modules/clean_tiff.nf
@@ -0,0 +1,15 @@
+process clean_tiff {
+  container 'ghcr.io/ncihtan/nf-imagecleaner'
+  input:
+    tuple val(meta), file(image)
+  output:
+    tuple val(meta), file('*.ome.tiff')
+  publishDir "$params.outdir/", mode: 'copy', overwrite: true
+  """
+  touch image_cleaned.ome.tiff
+  """
+  script:
+  """
+  clean_tiff.py $image --suffix $params.suffix
+  """
+}
diff --git a/modules/qptiff2ometiff.nf b/modules/qptiff2ometiff.nf
@@ -0,0 +1,19 @@
+process QPTIFF2OMETIFF {
+  tag {"$meta.id"}
+  label "process_medium"
+  container 'ghcr.io/sage-bionetworks-workflows/nf-artist:latest'
+  input:
+      tuple val(meta), file(image) 
+  output:
+      tuple val(meta), file("${image.simpleName}.ome.tiff")
+  stub:
+  """
+  touch raw_dir
+  touch "${image.simpleName}.ome.tiff"
+  """
+  script:
+  """
+  bioformats2raw $image 'raw_dir' -s 0 
+  raw2ometiff 'raw_dir' "${image.simpleName}.ome.tiff"
+  """
+}
diff --git a/nextflow.config b/nextflow.config
@@ -0,0 +1,37 @@
+// nextflow.config
+
+docker.enabled = true
+
+profiles {
+    test { includeConfig 'conf/test.config'}
+    sage { includeConfig 'conf/sage.config'}
+    tower {
+        process {
+            cpus = {1 * task.attempt}
+            memory = {2.GB * task.attempt}
+            maxRetries = 3
+            errorStrategy = {task.attempt <= 2 ? 'retry' : 'ignore' }
+            withLabel: process_low {
+                cpus = {1 * task.attempt}
+                memory = {2.GB * task.attempt}
+                maxRetries = 3
+                errorStrategy = {task.attempt <= 2 ? 'retry' : 'ignore' }
+            }
+            withLabel: process_medium {
+                cpus = {4 * task.attempt}
+                memory = {8.GB * task.attempt}
+                maxRetries = 3
+                errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
+            }
+            withLabel: process_high {
+                cpus = {8 * task.attempt}
+                memory = {16.GB * task.attempt}
+                maxRetries = 3
+                errorStrategy = {task.attempt <= 3 ? 'retry' : 'ignore' }
+            }
+        }
+    }
+}
+
+
+
diff --git a/subworkflows/convert.nf b/subworkflows/convert.nf
@@ -0,0 +1,14 @@
+include { QPTIFF2OMETIFF } from '../modules/qptiff2ometiff.nf'
+
+workflow CONVERT {
+    take: images
+
+    main:
+
+    QPTIFF2OMETIFF( images )
+
+    QPTIFF2OMETIFF.out
+        .set { converted }
+
+    emit: converted
+}
diff --git a/subworkflows/deid.nf b/subworkflows/deid.nf
@@ -0,0 +1,15 @@
+include { clean_ome } from "../modules/clean_ome.nf"
+include { clean_tiff } from "../modules/clean_tiff.nf"
+
+
+workflow DEID {
+  take:
+  images
+
+  main:
+
+  images | clean_ome | clean_tiff | set { cleaned }
+
+  emit:
+  cleaned
+}
diff --git a/subworkflows/samplesheet_split.nf b/subworkflows/samplesheet_split.nf
@@ -0,0 +1,24 @@
+workflow SAMPLESHEET_SPLIT {
+    take:
+    samplesheet
+    main:
+    Channel
+        .fromPath(samplesheet)
+        .splitCsv (header:true, sep:',' )
+        // Make meta map from the samplesheet
+        .map { 
+            row -> 
+            def meta = [:]
+            if (row.id ) {
+                meta.id = row.id
+            } else {
+                meta.id = file(row.image).simpleName
+            }
+            image = file(row.image)
+            [meta, image]
+        }
+        .set {images }
+
+    emit: 
+    images
+}
diff --git a/workflows/vectra2htan.nf b/workflows/vectra2htan.nf
@@ -0,0 +1,12 @@
+include { SAMPLESHEET_SPLIT } from '../subworkflows/samplesheet_split.nf'
+include { CONVERT } from '../subworkflows/convert.nf'
+// include { GET_METADATA } from '../subworkflows/get_metadata.nf'
+include { DEID } from '../subworkflows/deid.nf'
+
+workflow VECTRA2HTAN {
+    SAMPLESHEET_SPLIT ( params.input )
+    CONVERT( SAMPLESHEET_SPLIT.out.images )
+    CONVERT.out.converted.set{converted}
+    // GET_METADATA( converted ) 
+    DEID( converted )
+}