Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demo scripts to use iohub with SLURM job arrays #44

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions examples/iohub-slurm/batch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

module load anaconda
conda activate iohub

DATA_DIR=/hpc/projects/comp.micro/mantis/2023_05_10_PCNA_RAC1/0-crop-convert-zarr
INPUT_DATA=$DATA_DIR/sample.zarr

PROCESSED_DIR=/hpc/mydata/ziwen.liu
TEMP_DIR=$PROCESSED_DIR/demo_processed_sample_tmp
OUTPUT_DIR=$PROCESSED_DIR/demo_processed_sample.zarr
mkdir $TEMP_DIR

POSITION_INFO=$(iohub info -v $INPUT_DATA | grep "Positions")

POSITIONS=${POSITION_INFO: -1}

SCATTER_JOB_ID=$(sbatch --parsable --array=0-$(($POSITIONS-1)) process.sh $INPUT_DATA $TEMP_DIR)
echo $SCATTER_JOB_ID
GATHER_JOB_ID=$(sbatch --parsable -d afterok:$SCATTER_JOB_ID gather.sh $TEMP_DIR $OUTPUT_DIR)
echo $GATHER_JOB_ID
# rm -rf $TEMP_DIR
35 changes: 35 additions & 0 deletions examples/iohub-slurm/gather.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import argparse
import os
from glob import glob
import logging

from iohub.ngff import Plate, open_ome_zarr


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input",
type=str,
help="temp fovs path",
)
parser.add_argument(
"--output",
type=str,
help="output store path",
)
return parser.parse_args()


def main():
args = parse_args()
fov_paths = glob(os.path.join(args.input, "*/*/*"))
fovs = dict(
(path[len(args.input) :], open_ome_zarr(path)) for path in fov_paths
)
logging.info(repr(fovs))
_ = Plate.from_positions(args.output, fovs)


if __name__ == "__main__":
main()
18 changes: 18 additions & 0 deletions examples/iohub-slurm/gather.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

#SBATCH --job-name=demo_iohub_gather
#SBATCH --partition=cpu
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --mem-per-cpu=12G
#SBATCH --output=./output/gather_%j.out
env | grep "^SLURM" | sort

#For saving the files stdouts
now=$(date '+%y-%m-%d')
logpath=./logs/$now
mkdir -p $logpath
logfile="$logpath/gather.out"

python -u gather.py --input $1 --output $2 &> ${logfile}
45 changes: 45 additions & 0 deletions examples/iohub-slurm/process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Demo for FOV processing on a single node. Only works for small arrays."""

import argparse
import os

import numpy as np

from iohub.ngff import ImageArray, open_ome_zarr


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input",
type=str,
help="input store path",
)
parser.add_argument(
"--output",
type=str,
help="output store path",
)
return parser.parse_args()


def process(image: ImageArray):
print(f'processing image {image.shape} with shape {os.environ.get("SLURM_JOB_ID")}')
return np.zeros_like(image) + int(os.environ.get("SLURM_JOB_ID"))


def main():
args = parse_args()
with open_ome_zarr(args.input) as input_fov:
with open_ome_zarr(
args.output,
mode="w",
layout="fov",
channel_names=input_fov.channel_names,
axes=input_fov.axes,
) as output_fov:
output_fov["0"] = process(input_fov["0"])


if __name__ == "__main__":
main()
22 changes: 22 additions & 0 deletions examples/iohub-slurm/process.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

#SBATCH --job-name=demo_iohub_scatter
#SBATCH --partition=cpu
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --mem-per-cpu=12G
#SBATCH --output=./output/process_%A-%a.out
env | grep "^SLURM" | sort

#For saving the files stdouts
now=$(date '+%y-%m-%d')
logpath=./logs/$now
mkdir -p $logpath
logfile="$logpath/process_$SLURM_ARRAY_TASK_ID.out"

FOV_NAME=/0/0/$SLURM_ARRAY_TASK_ID

module load anaconda/2022.05
conda activate iohub
python -u process.py --input $1$FOV_NAME --output $2$FOV_NAME