forked from statgenetics/seqspark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstall
executable file
·174 lines (146 loc) · 4.66 KB
/
install
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/perl
use strict;
use warnings;
use Cwd qw(abs_path cwd);
use File::Basename;
use Getopt::Long;
sub check_args {
#Set up defaults
my $prefix = $ENV{HOME} . "/seqspark";
my $help = 0;
my $db_dir = '';
my $download = 1;
my $set_conf = 1;
#If there is no argument at all, print help.
if (scalar @ARGV == 0) {&help;}
#Get options
GetOptions(
'prefix=s' => \$prefix,
'db-dir=s' => \$db_dir,
'download!' => \$download,
'h|help!' => \$help
);
#If help, print help.
if ($help) {&help;}
$prefix = abs_path($prefix);
my $src_dir = abs_path(dirname($0));
if ($prefix eq $src_dir) {
die "Please don't install seqspark into the source code directory. Exit without installation.";
}
if ($src_dir ne abs_path(cwd())) {
warn "$src_dir," . cwd();
die "Please run this install script in the source code directory. Exit without installation.";
}
if (-f "$prefix/conf/reference.conf") {
print STDERR "Old version detected, won't change db-dir setting.\n";
$set_conf = 0;
$download = 0;
} elsif ($db_dir eq "") {
$db_dir = "/user/$ENV{USR}/seqspark";
print STDERR "No db-dir specified, will use $db_dir.\n"
} elsif (! $download) {
print STDERR "Will install into $prefix, database files won't be downloaded.\n";
} else {
print STDERR "Will install into $prefix, database files will be downloaded to $db_dir\n";
}
return ($prefix, $set_conf, $db_dir, $download);
}
sub build {
print STDERR "Building seqspark ...\n";
!system("bash", "-c", "sbt -sbt-version 0.13.16 assembly 2>&1 | tee build.log") or die "Failed to build jar: $!\n";
print STDERR "Finished building seqspark.\n";
}
sub install {
my ($prefix, $set_conf, $db_dir) = @_;
open my $fh, "build.sbt" or die "Failed to open build.sbt: $!\n";
my $jar;
my $scalaVersion = "2.11";
while (<$fh>) {
chomp;
if (m/(SeqSpark.*\.jar)/) {
$jar = $1;
last;
}
if (m/scalaVersion.+?(2\.1[01])\./) {
$scalaVersion = $1;
}
}
my $target = "target/scala-$scalaVersion/$jar";
if (! $jar) {
die "Cannot get the jar file name from build.sbt";
} elsif (! -f $target) {
die "Cannot find $target, compiling failed?";
}
my $scripts = "bin";
my $conf = "conf";
!system("mkdir -p $prefix") or die "Failed to create folder: $prefix\n";
!system("cp", $target, $prefix) or die "Failed to copy jar file to $prefix\n";
!system("cp", "-r", $scripts, $prefix) or die "Failed to copy scripts to $prefix\n";
##prepare conf
if (! $set_conf) {
return;
}
!system("mkdir -p $prefix/conf") or die "Failed to create folder: $prefix/conf\n";
!system("cp conf/log4j.properties $prefix/conf/") or die "Falied to copy log4j.properties to $prefix/conf/: $!\n";
open my $i_conf, "conf/reference.conf" or die "Failed to open conf/reference.conf: $!\n";
open my $o_conf, ">$prefix/conf/reference.conf" or die "Failed to open $prefix/conf/reference.conf for write: $!\n";
while (<$i_conf>) {
if (m/dbDir_unset/) {
printf $o_conf "dbDir = %s\n", $db_dir;
} else {
print $o_conf $_;
}
}
close $i_conf;
close $o_conf;
}
sub download_dbs {
my ($db_dir, $download) = @_;
if (! $download) {
print STDERR "No need to download database files.\n";
return;
}
!system("hdfs", "dfs", "-mkdir", "-p", "$db_dir")
or die "Cannot create dir $db_dir on HDFS, you need to mannually download databases later\n";
my @db = qw/refFlat_table refGene_seq dbSNP-138.vcf.bz2/;
if ($db_dir) {
print STDERR "going to download refseq and dbsnp to $db_dir\n";
for my $db (@db) {
my $test = system("hdfs dfs -test -f $db_dir/$db");
if (!$test) {
print STDERR "database $db already exists, skip downloading\n";
next;
}
!system("curl http://seqspark.statgen.us/$db | hdfs dfs -appendToFile - $db_dir/$db")
or die "Failed to download\n";
}
} else {
die "You didn't specify a valid HDFS path\n";
}
}
sub help {
print "Usage: ./install [options]
Options:
--help,-h Show this help message
--prefix=[dir] Set install directory. [\"\"]
--db-dir=[dir] HDFS path to store the database files. [/user/$ENV{USER}/seqspark]
--download|no-download download the database files or not. [download]
Examples:
Fresh install:
./install --prefix ~/software/seqspark --db-dir ref/seqspark
Install an updated version into the same prefix and db-dir:
./install --prefix /to/your/old/prefix
Install without downloading databases:
./install --prefix ~/seqspark --db-dir ref/seqspark --no-download\n";
exit;
}
sub main {
#Check agrguments
my ($prefix, $set_conf, $db_dir, $download) = &check_args();
#Build seqspark using sbt
&build;
#Install seqspark and possibly databse files.
&install($prefix, $set_conf, $db_dir);
&download_dbs($db_dir, $download);
}
&main;