-
Notifications
You must be signed in to change notification settings - Fork 0
/
database_install
executable file
·51 lines (42 loc) · 2.82 KB
/
database_install
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
set -eu
read -p "This scirpt will download around 40Gb of data, make sure you are running it using <docker run --rm -itv /path/to/varwolf/:/root/varwolf/ varwolf:latest database_install> to have the files saved locally for future use. Continue (y/n)?" CHOICE
case "$CHOICE" in
y|Y ) echo "Downloading...";;
n|N ) exit;;
* ) echo "invalid"; exit;;
esac
# Get GATK resource bundle files
mkdir -p /root/varwolf/db/refs/ && cd /root/varwolf/db/refs/
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.fai
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz.tbi
bgzip Homo_sapiens_assembly38.dbsnp138.vcf
tabix Homo_sapiens_assembly38.dbsnp138.vcf.gz
# Download vep cache and custom files
mkdir -p /root/varwolf/db/vep_files/ && cd /root/varwolf/db/vep_files/
perl /root/ensembl-vep/INSTALL.pl \
-a c \
-s Homo_sapiens_refseq \
-y GRCh38 \
-c /root/varwolf/db/vep_files/
apt install pipx
pipx install gdown
PATH="/root/.local/bin/:${PATH}"
gdown --id 1L_YQ7lXKMLd5n_jHsSQAEobZdQ-ZvU6Y
tar -xvf custom_files.tar.gz
rm custom_files.tar.gz
mkdir -p /root/varwolf/input/