-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtesting_the_classifiers_1.sh
110 lines (69 loc) · 2.82 KB
/
testing_the_classifiers_1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
#SBATCH --job-name=testing_classifiers
#SBATCH --output=/storage/users/cferguson/jobs/final_run/testing_classifiers_%A_%a.log
#SBATCH --ntasks=1
#SBATCH --array=1-24
#SBATCH --mem-per-cpu=3GB
#SBATCH --cpus-per-task=10
############ software
# impoting modules that I'll need
module load apps/anaconda-4.7.12.tcl
# allowing the script to access my conda enviroments
eval "$(conda shell.bash hook)"
############ defining variables
# setting the base directory
BaseDir=/storage/users/cferguson/rotation_2_data/Data_for_classifier_assessment/sym_data
# defining witch simulated dataset to use
data=SNPS_and_homopolymers
# pulling out the accession/filenames
file_names=$( sed "${SLURM_ARRAY_TASK_ID}q;d" ${BaseDir}/../file_names.txt )
# grabbing the names of the folders
folder_names=$( ls ${BaseDir}/sym_data_1/SNPs )
# grabbing the simulated data filenames
sym_names=$( ls ${BaseDir} )
echo ${file_names}
########### kraken2
conda activate kraken2
echo kraken2
database=/storage/users/cferguson/Classification_database/kraken2
for y in ${sym_names[@]}; do
report=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/kraken2/report
out_reads=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/kraken2/reads
for x in ${folder_names[@]}; do
echo ${y}
echo ${x}
file1=${BaseDir}/${y}/${data}/${x}/${file_names}
time kraken2 --db ${database} ${file1} --threads 20 --report ${report}/${x}/${file_names}.out --classified-out ${out_reads}/${x}/${file_names}.out
done
done
conda deactivate
########### kaiju
conda activate kaiju
echo kaiju
database=/storage/users/cferguson/Classification_database/kaiju
for y in ${sym_names[@]}; do
report=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/kaiju/report
out_reads=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/kaiju/reads
for x in ${folder_names[@]}; do
echo ${y}
echo ${x}
file1=${BaseDir}/${y}/${data}/${x}/${file_names}
time kaiju -z 20 -t ${database}/nodes.dmp -f ${database}/viruses/kaiju_db_viruses.fmi -i ${file1} -o ${out_reads}/${x}/${file_names}.out
done
done
conda deactivate
########### centrifuge
conda activate centrifuge
echo centrifuge
database=/storage/users/cferguson/Classification_database/centrifuge
for y in ${sym_names[@]}; do
report=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/centrifuge/report
out_reads=/storage/users/cferguson/rotation_2_data/classified_data/${y}/${data}/centrifuge/reads
for x in ${folder_names[@]}; do
echo ${y}
echo ${x}
file1=${BaseDir}/${y}/${data}/${x}/${file_names}
time centrifuge -x ${database}/abv -f ${file1} -S ${out_reads}/${x}/${file_names}.out --report-file ${report}/${x}/${file_names}.out -p 20
done
done
conda deactivate