-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01-download-genomes.sh
35 lines (28 loc) · 1.2 KB
/
01-download-genomes.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/sh
#SBATCH --job-name="genomes"
#SBATCH --partition=batch
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=24
#SBATCH --mem-per-cpu=10G
#SBATCH --time=7-00:00:00
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH -e genomes.err-%N
#SBATCH -o genomes.out-%N
module load Miniconda3
source activate /home/ad14556/conda-envs/envs/ncbi_datasets/
cd /home/ad14556/pangenome-large
while read line
do
id=$(echo $line | cut -d ' ' -f1) # get accession id from metadata
name=$(echo $line | cut -d ' ' -f2) # get sample id from metadata
datasets download genome accession "$id" --include genome # download genome
unzip -o ncbi_dataset.zip # unzip file
mv ncbi_dataset/data/"$id"/GCF*.fna data/genomes/"$name".fna # move and rename genome fasta file
# mv ncbi_dataset/data/"$id"/cds*.fna data/coding-sequences/"$name"-CDS.fna
# mv ncbi_dataset/data/"$id"/*.gff data/general-feature-files/"$name".gff
# mv ncbi_dataset/data/"$id"/*.faa data/proteins/"$name"-PROTEINS.faa
rm ncbi_dataset.zip # remove zipped file
rm -r ncbi_dataset/ # removed directory
done < metadata/pseudoalteromonas-large-dataset.txt