forked from mkirsche/nCovIllumina
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipeline.sh
executable file
·157 lines (118 loc) · 4.64 KB
/
pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/bin/bash
### Pipeline for processing Illumina reads ###
## Set up parameters
# Get run directory
if [ "$(uname -s)" = 'Linux' ]; then
BINDIR=$(dirname "$(readlink -f "$0" || echo "$(echo "$0" | sed -e 's,\\,/,g')")")
else
BINDIR=$(dirname "$(readlink "$0" || echo "$(echo "$0" | sed -e 's,\\,/,g')")")
fi
#------------------------------------------------------------------------------
usage()
{
cat << EOF
usage: $0 [options]
OPTIONS:
-h show this message
-i path to input folder containing FASTQs
-o path to folder where output 'results' folder will be placed
-c path to config file for this run
EOF
}
#------------------------------------------------------------------------------
# set default values here
CONFIG=/opt/nCovIllumina/config/illumina.txt
# parse input arguments
while getopts "hi:o:c:" OPTION
do
case $OPTION in
h) usage; exit 1 ;;
i) INPUTDIR=$OPTARG ;;
o) OUTPUTDIR=$OPTARG ;;
c) CONFIG=$OPTARG ;;
?) usage; exit ;;
esac
done
# if necessary arguments are not present, display usage info and exit
if [[ ! -s "$BINDIR/bashrc" ]]; then
echo "Error: BINDIR ($BINDIR) does not contain the expected bashrc file."
usage
exit 2
fi
# if necessary arguments are not present, display usage info and exit
if [[ -z "$OUTPUTDIR" ]]; then
OUTPUTDIR="$INPUTDIR"
fi
#------------------------------------------------------------------------------
# Load parameters from config
source "$BINDIR/bashrc"
# Set up script parameters based on config setup
REFERENCE=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/*.reference.fasta
GENES=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/genes.gff3
# postfiltering parameters
GLOBALDIVERSITY=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/approx_global_diversity.tsv # observed global variants
KEYPOS=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/key_positions.txt # clade-definiting positions
CASEDEFS=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/variant_case_definitions.csv # types of variant annotations
AMPLICONS=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/amplicons.tsv # amplicons file
REF_GB=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/reference_seq.gb
PANGOLIN_DATA=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/pangoLEARN/pangoLEARN/data
NEXTSTRAIN_CLADES=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/clades.tsv
SNPEFF_CONFIG=$GENOMEDIR/$PATHOGENREF/$PRIMERVERSION/snpEff.config
cd "$OUTPUTDIR"
source "$CONFIG"
## Load submodule
if [ ! -r "$BINDIR/VariantValidator/README.md" ]
then
echo 'Downloading VariantValidator submodule'
cd "$BINDIR"
git submodule update --init --recursive
cd "$OUTPUTDIR"
fi
javac $BINDIR/VariantValidator/src/*.java
#------------------------------------------------------------------------------
## Filter reads by length
FILTEREDINPUTDIR=$OUTPUTDIR'/filteredreads'
$BINDIR/src/filterreads.sh $INPUTDIR $FILTEREDINPUTDIR $BINDIR $MIN_READ_LENGTH $MAX_READ_LENGTH
#------------------------------------------------------------------------------
# Run iVar pipeline
if [ ! -d "$OUTPUTDIR/results" ]
then
mkdir "$OUTPUTDIR/results"
cp "$CONFIG" "$OUTPUTDIR/results"
echo 'Getting ivar config'
javac $BINDIR/src/ParseIvarConfig.java
extraargs=`java -cp $BINDIR/src ParseIvarConfig $CONFIG`
echo 'Running ivar'
conda activate artic-ncov2019-illumina
$BINDIR/src/ivar.sh $FILTEREDINPUTDIR $extraargs
fi
#------------------------------------------------------------------------------
## Call variants
# Load necessary conda environment
conda activate ncov_illumina
# Call variants
$BINDIR/src/callvariants.sh $OUTPUTDIR $BINDIR $REFERENCE $GENES
#------------------------------------------------------------------------------
## Run postfiltering
$BINDIR/src/run_postfilter.sh $OUTPUTDIR $BINDIR $NTCPREFIX $REFERENCE $GLOBALDIVERSITY $KEYPOS $CASEDEFS $AMPLICONS
# run postfilter summary
python $BINDIR/src/summarize_postfilter.py --rundir $OUTPUTDIR/results/postfilt
#------------------------------------------------------------------------------
## Run SnpEff
$BINDIR/src/run_snpEff.sh $OUTPUTDIR $BINDIR $SNPEFF_CONFIG $DBNAME $NTCPREFIX
#------------------------------------------------------------------------------
## Run pangolin clades
if [ -z $THREADS ]; then
THREADS=1
fi
conda deactivate
conda activate pangolin
$BINDIR/src/run_pangolin.sh $OUTPUTDIR $BINDIR $THREADS $PANGOLIN_DATA $NTCPREFIX
#------------------------------------------------------------------------------
## Run nextstrain clades
conda deactivate
conda activate nextstrain
$BINDIR/src/run_nextstrain_clades.sh $OUTPUTDIR $BINDIR $REF_GB $NEXTSTRAIN_CLADES $NTCPREFIX
#------------------------------------------------------------------------------
## Copy final results into final folder
$BINDIR/src/final_cleanup.sh $OUTPUTDIR $NTCPREFIX