forked from sara-nl/iRODS-RDM-HPC-course
-
Notifications
You must be signed in to change notification settings - Fork 4
/
jobscript
38 lines (26 loc) · 1.51 KB
/
jobscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
#Set job requirements
#We are going to use only one node (-N 1), and use the express queue (-p short). And we set the time to 4 minutes (-t 4:00)
#SBATCH -p short
#SBATCH -N 1
#SBATCH -t 4:00
#Make sure you have logged in to your iRODS zone prior to job submission. iRODS creates a irodsA file which is subsequently used by the worker nodes.
#move to your home directory and current git repository which is also mounted on your scratch space and might hold the processing script
cd $HOME/iRODS-RDM-HPC-course
rodscoll='/surfZone1/home/irods-user1/YOUR OUTPUT COLLECTION'
inputdir="$TMPDIR/inputdat$SLURM_JOBID"
outputdir="$TMPDIR/outputdat$SLURM_JOBID"
mkdir $inputdir
mkdir $outputdir
#search for data objects with iquest
#get these files from iRODS and store them under scratch
iquest "%s/%s" "select COLL_NAME, DATA_NAME where META_DATA_ATTR_NAME = 'author' and META_DATA_ATTR_VALUE = 'Lewis Carroll'" | parallel iget {} $inputdir
#perform the word count analysis
resultsfile=results$SLURM_JOBID.dat
cat $inputdir/* | tr '[:upper:]' '[:lower:]' | awk '{for(i=1;i<=NF;i++) count[$i]++} END {for(j in count) print j, count[j]}' > $outputdir/$resultsfile
#put results back into iRODS
iput $outputdir/$resultsfile $rodscoll
#add metadata provenance
#possible add a metadata annotation script call either locally via scipt file, rule file or server side via installed rules, where last is preferred but also difficult to implement.
imeta add -d $rodscoll/$resultsfile 'somekey' 'somevalue'
imeta ls -d $rodscoll/$resultsfile