-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_otu_table.sh
executable file
·148 lines (131 loc) · 6 KB
/
make_otu_table.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/bin/bash
outlocation=$(mktemp -d /data/files/XXXXXX)
SCRIPTDIR=$(dirname "$(readlink -f "$0")")
# sanity check
#printf "Conda env: $CONDA_DEFAULT_ENV\n"
#printf "Outlocation: $outlocation\n"
#printf "Python version: $(python --version | awk '{print $2}')\n"
#printf "Biopython version: $(conda list | egrep biopython | awk '{print $2}')\n"
#printf "Usearch version: $(usearch11 | head -n1 | awk '{print $2}' | tr -d ',')\n"
#printf "Vsearch version: $(conda list | egrep vsearch | awk '{print $2}')\n"
#printf "Dada2 version: $(conda list | egrep dada2 | awk '{print $2}')\n"
#printf "Unzip version: $(unzip -v | head -n1 | awk '{print $2}')\n"
#printf "Bash version: ${BASH_VERSION}\n"
#printf "SCRIPTDIR: $SCRIPTDIR\n\n"
if [ $3 == "cluster_otus" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -abundance_minsize "${9}"
fi
if [ $3 == "dada2" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation
fi
if [ $3 == "unoise" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -a ${9} -abundance_minsize "${10}"
fi
if [ $3 == "vsearch_unoise" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -a ${9} -abundance_minsize "${10}"
fi
if [ $3 == "vsearch_unoise_no_chimera_check" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -a ${9} -abundance_minsize "${10}"
fi
if [ $3 == "vsearch" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -cluster_id ${9} -abundance_minsize "${10}" -cluster_size "${11}"
fi
if [ $3 == "vsearch_no_chimera_check" ]
then
python $SCRIPTDIR"/make_otu_table.py" -i $1 -t $2 -c $3 -of $outlocation -cluster_id ${9} -abundance_minsize "${10}" -cluster_size "${11}"
fi
#output files
if [ $4 ]
then
mv $outlocation"/all_output.zip" $4 && [ -f $outlocation"/all_output.zip" ]
fi
if [ $5 ]
then
mv $outlocation"/log.log" $5 && [ -f $outlocation"/log.log" ]
fi
# Output Otu sequences as fasta file
if [ $6 ]
then
cp $outlocation"/otu_sequences.fa" $6 && [ -f $outlocation"/otu_sequences.fa" ]
# convert interleaved or multiline fasta to singleline
cat $outlocation"/otu_sequences.fa" |
awk '/^>/ { if(NR>1) print ""; printf("%s\n",$0); next; } { printf("%s",$0);} END {printf("\n");}' > $outlocation"/otu_sequences_DG.fa"
rm $outlocation"/otu_sequences.fa"
#------------------------------------------------------------------
# Adjust fasta headers to make them compatible with Otu-table.
# Files seem to be sorted, but sort anyway, just in case.
#------------------------------------------------------------------
# max length Otu label
# Check if file exists. If not, then something went wrong and the input data might be incorrect.
if [ ! -e $outlocation"/otu_sequences_DG.fa" ]; then
echo "ERROR: /otu_sequences_DG.fa file does not exist. Please contact your Galaxy admin" >&2
fi
# Check if file is empty. If so, then something went wrong and the input data might be incorrect.
if [ ! -s $outlocation"/otu_sequences_DG.fa" ]; then
echo "ERROR: /otu_sequences_DG.fa file is empty. Please contact your Galaxy admin" >&2
exit 1
fi
max_length=$(cat $outlocation"/otu_sequences_DG.fa"| egrep "^>Otu" | awk '{print length($1)}' | sort -n | uniq | tail -n1)
# max number of digits of Otu label (substract "Otu" from max_length Otu label)
otu_digits=$(echo $max_length-4 | bc)
# Perform an additional check to make sure that $otu_digits is not a negative number. This would make the script
# end up in an infinite loop.
if (( $otu_digits < 1 )); then
echo "ERROR: otu_digits is less than 1! Please contact your Galaxy admin" >&2
exit 1
fi
# create a string of zeros
otu_digit_string=$(echo $(yes "0" | head -n "$otu_digits") | tr -d " ")
# padding zeros
cat $outlocation"/otu_sequences_DG.fa" |
sed "s/\(^>Otu\)\([0-9]\)/\1$otu_digit_string\2/g; s/0*\([0-9]\{$otu_digits,\}\)/\1/g" | paste - - | sort -n | sed 's/\t/\n/g' > $outlocation"/otu_sequences_DG2.fa"
rm $outlocation"/otu_sequences_DG.fa"
cp $outlocation"/otu_sequences_DG2.fa" $6 && [ -f $outlocation"/otu_sequences_DG2.fa" ]
fi
# Output Otu table
if [ $7 ]
then
# it is not $7 that gets changed but the actual file ###.dat
cp $outlocation"/otutab.txt" $7 && [ -f $outlocation"/otutab.txt" ]
#------------------------------------------------------------------
# adjust Otu label format and sort
#------------------------------------------------------------------
# max length Otu label
# Check if file exists. If not, then something went wrong and the input data might be incorrect.
if [ ! -e $outlocation"/otutab.txt" ]; then
echo "ERROR: otutab.txt file does not exist. Please contact your Galaxy admin" >&2
fi
# Check if file is empty. If so, then something went wrong and the input data might be incorrect.
if [ ! -s $outlocation"/otutab.txt" ]; then
echo "ERROR: otutab.txt file is empty. Please contact your Galaxy admin" >&2
exit 1
fi
max_length=$(cat $outlocation"/otutab.txt" | egrep "^Otu" | awk '{print length($1)}' | sort -n | uniq | tail -n1)
## max number of digits of Otu label (substract "Otu" from max_length Otu label)
otu_digits=$(echo $max_length-3 | bc)
# Perform an additional check to make sure that $otu_digits is not a negative number. This would make the script
# end up in an infinite loop.
if (( $otu_digits < 1 )); then
echo "ERROR: otu_digits is less than 1! Please contact your Galaxy admin" >&2
exit 1
fi
# create a string of zeros
otu_digit_string=$(echo $(yes "0" | head -n "$otu_digits") | tr -d " ")
# padding zeros
cat $outlocation"/otutab.txt" | sed "s/\(^Otu\)\([0-9]\)/\1$otu_digit_string\2/g; s/0*\([0-9]\{$otu_digits,\}\)/\1/g" | sort -n > $outlocation"/otutab_DG.txt"
rm $outlocation"/otutab.txt"
cp $outlocation"/otutab_DG.txt" $7 && [ -f $outlocation"/otutab_DG.txt" ]
fi
# OUtput Bioom file
if [ $8 ] && [ -f $outlocation"/bioom.json" ] && [ -f $outlocation"/bioom.json" ]
then
mv $outlocation"/bioom.json" $8 && [ -f $outlocation"/bioom.json" ]
fi
rm -rf $outlocation
echo "script finished"