forked from kew-myco/its_glue
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipe_two.cluster_classify.sh
executable file
·80 lines (67 loc) · 2.75 KB
/
pipe_two.cluster_classify.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env bash
# #
# .__ __ .__ #
# |__|/ |_ ______ ____ | | __ __ ____ #
# | \ __\/ ___/ / ___\| | | | \_/ __ \ #
# | || | \___ \ / /_/ > |_| | /\ ___/ #
# |__||__| /____ > \___ /|____/____/ \___ > #
## \/ /_____/ \/ ##
### ###
#### Author : Alex Byrne ####
#### Contact : [email protected] ####
#### ####
#### See its_glue github for usage details. ####
#### ####
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
set -e
while getopts d:f:o:c:s: flag
do
case "${flag}" in
c) cid=${OPTARG};;
d) db=${OPTARG};;
f) fasta=${OPTARG};;
o) out_dir=${OPTARG};;
s) sco=${OPTARG};;
*) echo "ERROR: invalid flag! Have you read the README?"
exit 1
;;
esac
done
#(TODO: make flags c and x do more things)
if [[ "$db" == "" || "$fasta" == "" ]] ; then
echo "ERROR: flags -d and -f require arguments." >&2
exit 1
fi
if [[ "$out_dir" == "" ]] ; then
out_dir='.'
fi
if [ "$cid" == "" ] ; then
echo "warning: no clustering id provided (-c), defaulting to 0.97" >&2
cid=0.97
fi
if [ "$sco" == "" ] ; then
echo "warning: no sintax bootstrap cutoff provided (-s), defaulting to 0.6" >&2
sco=0.6
fi
#activate conda
CONDA_BASE=$(conda info --base)
source "$CONDA_BASE/etc/profile.d/conda.sh"
if conda activate ./seq_conda ; then
echo "activated conda env"
else
echo 'conda environment not set up! Have you run CREATE_ENV.sh?'
exit 1
fi
# vsearch cluster to OTUs
# --id 0.97 : 97% pairwise to match to an OTU. This isn't ideal but it's certainly standard
# --sizeorder: abundance trumps distance for ties
# --maxaccepts: number of decent hits to look for before making a decision (default 1!)
vsearch --cluster_size "${fasta}" \
--otutabout "${out_dir}"/cluster_membership.tsv \
--id "${cid}" \
--sizeorder --clusterout_sort
vsearch --sintax "${fasta}" \
--db "${db}" \
--sintax_cutoff "${sco}" \
--tabbedout "${out_dir}"/sintax_classifications.tsv
python3 ./scripts/modules/cluster_classify_organise_output.py "${out_dir}"/sintax_classifications.tsv "${out_dir}"/cluster_membership.tsv "${out_dir}"/clusters_with_taxonomy.csv