@@ -7,77 +7,175 @@ IN=$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/
7
7
OUT=$PASH_TOP /evaluation/benchmarks/dependency_untangling/output/
8
8
IN_NAME=$PASH_TOP /evaluation/benchmarks/dependency_untangling/input/100G.txt
9
9
10
+ # # Import the build library
11
+ . " $IN /build_lib.sh"
12
+
10
13
if [ " $1 " == " -c" ]; then
11
- rm -rf ${IN} /jpg
12
- rm -rf ${IN} /log_data
13
- rm -rf ${IN} /wav
14
- rm -rf ${IN} /nginx-logs
15
- rm -rf ${IN} /node_modules
16
- rm -rf ${IN} /pcap_data
17
- rm -rf ${IN} /pcaps
18
- rm -rf ${IN} /packages
19
- rm -rf ${IN} /mir-sa
20
- rm -rf ${IN} /deps
21
- rm -rf ${IN} /bio
22
- rm -rf ${IN} /output
23
- rm -rf ${OUT}
14
+ rm -rf " ${IN} /jpg"
15
+ rm -rf " ${IN} /log_data"
16
+ rm -rf " ${IN} /wav"
17
+ rm -rf " ${IN} /nginx-logs"
18
+ rm -rf " ${IN} /node_modules"
19
+ rm -rf " ${IN} /pcap_data"
20
+ rm -rf " ${IN} /pcaps"
21
+ rm -rf " ${IN} /packages"
22
+ rm -rf " ${IN} /mir-sa"
23
+ rm -rf " ${IN} /deps"
24
+ rm -rf " ${IN} /bio"
25
+ rm -rf " ${IN} /output"
26
+ rm -rf " ${OUT} "
24
27
exit
25
28
fi
26
29
30
+
31
+
32
+ # # Q: Can these checks be generated automatically? This would be great if
33
+ # # the user just ran the command, and then if it succeeded, the test is generated.
34
+ wav_step_1_done_check ()
35
+ {
36
+ local prefix=" wav/file_example_WAV"
37
+ files_exist_done_check " ${prefix} _1MG.wav.kernel" " ${prefix} _2MG.wav.kernel" " ${prefix} _5MG.wav.kernel" " ${prefix} _10MG.wav.kernel"
38
+ return $?
39
+ }
40
+
41
+ # # Q: Can we automatically check that this is idempotent?
42
+ # # For example, in the step below there were 2 non-idempotence issues:
43
+ # # - wget downloads wav.zip.2 if wav.zip already exists, so we need to use -O flag
44
+ # # - wav files need to be saved with .kernel suffix to make step 2 idempotent
45
+ wav_step_1 ()
46
+ {
47
+ curl -C - -o wav.zip http://pac-n4.csail.mit.edu:81/pash_data/wav.zip
48
+ unzip wav.zip
49
+ local prefix=" wav/file_example_WAV"
50
+ # # Necessary so that the iteration in step 2 is idempotent
51
+ for f in " ${prefix} _1MG.wav" " ${prefix} _2MG.wav" " ${prefix} _5MG.wav" " ${prefix} _10MG.wav" ; do
52
+ mv $f $f .kernel
53
+ done
54
+ }
55
+ export -f wav_step_1_done_check
56
+ export -f wav_step_1
57
+
58
+ wav_step_2_done_check ()
59
+ {
60
+ local prefix=" wav/file_example_WAV"
61
+ for i in $( seq 0 " $WAV_DATA_FILES " ) ; do
62
+ if ! files_exist_done_check " ${prefix} _1MG.wav$i .wav" " ${prefix} _2MG.wav$i .wav" " ${prefix} _5MG.wav$i .wav" " ${prefix} _10MG.wav$i .wav" ; then
63
+ return 1
64
+ fi
65
+ done
66
+ echo " Done"
67
+ return 0
68
+ }
69
+
70
+ wav_step_2 ()
71
+ {
72
+ for f in wav/* .kernel; do
73
+ for (( i = 0 ; i <= $WAV_DATA_FILES ; i++ )) do
74
+ echo copying to " $base_f$i .wav"
75
+ base_f=wav/$( basename " $f " .kernel)
76
+ cp " $f " " $base_f$i .wav"
77
+ done
78
+ done
79
+ }
80
+ export -f wav_step_2_done_check
81
+ export -f wav_step_2
82
+
83
+
84
+ jpg_step ()
85
+ {
86
+ curl -C - -o jpg.zip $JPG_DATA_LINK
87
+ unzip jpg.zip
88
+ rm -rf ${IN} /jpg.zip
89
+ }
90
+
91
+ jpg_step_done_check ()
92
+ {
93
+ number_of_files_in_dir $JPG_NUMBER jpg
94
+ }
95
+ export -f jpg_step
96
+ export -f jpg_step_done_check
97
+
98
+ nginx_logs_step_1 ()
99
+ {
100
+ curl -C - -o nginx.zip http://pac-n4.csail.mit.edu:81/pash_data/nginx.zip
101
+ unzip nginx.zip
102
+ rm nginx.zip
103
+ }
104
+
105
+ nginx_logs_step_1_done_check ()
106
+ {
107
+ local prefix=" nginx-logs/log"
108
+ for i in $( seq 0 7) ; do
109
+ if ! files_exist_done_check " ${prefix} $i " ; then
110
+ return 1
111
+ fi
112
+ done
113
+ return $?
114
+ }
115
+
116
+ export -f nginx_logs_step_1
117
+ export -f nginx_logs_step_1_done_check
118
+
119
+ nginx_logs_step_2 ()
120
+ {
121
+ # generating analysis logs
122
+ mkdir -p ${IN} /log_data
123
+ for (( i = 1 ; i <= $LOG_DATA_FILES ; i++ )) do
124
+ for j in nginx-logs/* ; do
125
+ n=$( basename $j )
126
+ cp $j log_data/log${i} _${n} .log;
127
+ done
128
+ done
129
+ }
130
+
131
+
132
+ nginx_logs_step_2_done_check ()
133
+ {
134
+ local prefix=" log_data/log"
135
+ for j in $( seq 0 7) ; do
136
+ for i in $( seq 1 " $LOG_DATA_FILES " ) ; do
137
+ if ! files_exist_done_check " ${prefix}${i} _log${j} .log" ; then
138
+ return 1
139
+ fi
140
+ done
141
+ done
142
+ echo " Done"
143
+ return 0
144
+ }
145
+
146
+ export -f nginx_logs_step_2_done_check
147
+ export -f nginx_logs_step_2
148
+
149
+
27
150
setup_dataset () {
28
151
if [ " $1 " == " --small" ]; then
29
- LOG_DATA_FILES=6
30
- WAV_DATA_FILES=20
152
+ export LOG_DATA_FILES=6
153
+ export WAV_DATA_FILES=20
31
154
NODE_MODULE_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/node_modules.zip
32
155
BIO_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/bio.zip
33
- JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/jpg.zip
156
+ export JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/jpg.zip
157
+ export JPG_NUMBER=508
34
158
PCAP_DATA_FILES=1
35
159
else
36
- LOG_DATA_FILES=84
37
- WAV_DATA_FILES=120
160
+ export LOG_DATA_FILES=84
161
+ export WAV_DATA_FILES=120
38
162
NODE_MODULE_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/node_modules.zip
39
163
BIO_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/bio.zip
40
- JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/jpg.zip
164
+ export JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/jpg.zip
165
+ export JPG_NUMBER=1624
41
166
PCAP_DATA_FILES=15
42
167
fi
43
168
44
- if [ ! -d ${IN} /wav ]; then
45
- wget http://pac-n4.csail.mit.edu:81/pash_data/wav.zip
46
- unzip wav.zip && cd wav/
47
- for f in * .wav; do
48
- FILE=$( basename " $f " )
49
- for (( i = 0 ; i <= $WAV_DATA_FILES ; i++ )) do
50
- echo copying to $f$i .wav
51
- cp $f $f$i .wav
52
- done
53
- done
54
- echo " WAV Generated"
55
- fi
56
-
57
- if [ ! -d ${IN} /jpg ]; then
58
- cd ${IN}
59
- wget $JPG_DATA_LINK
60
- unzip jpg.zip
61
- echo " JPG Generated"
62
- rm -rf ${IN} /jpg.zip
63
- fi
169
+ # # WAV
170
+ execute_step wav_step_1 wav_step_1_done_check " WAV zip download"
171
+ execute_step wav_step_2 wav_step_2_done_check " WAV file generation"
172
+
173
+ # # JPG
174
+ execute_step jpg_step jpg_step_done_check " JPG Downloading"
64
175
65
- # download the input for the nginx logs and populate the dataset
66
- if [ ! -d ${IN} /log_data ]; then
67
- cd $IN
68
- wget http://pac-n4.csail.mit.edu:81/pash_data/nginx.zip
69
- unzip nginx.zip
70
- rm nginx.zip
71
- # generating analysis logs
72
- mkdir -p ${IN} /log_data
73
- for (( i = 1 ; i <= $LOG_DATA_FILES ; i++ )) do
74
- for j in nginx-logs/* ; do
75
- n=$( basename $j )
76
- cat $j > log_data/log${i} _${n} .log;
77
- done
78
- done
79
- echo " Logs Generated"
80
- fi
176
+ # # nginx logs
177
+ execute_step nginx_logs_step_1 nginx_logs_step_1_done_check " NGINX logs Downloading"
178
+ execute_step nginx_logs_step_2 nginx_logs_step_2_done_check " NGINX logs generated"
81
179
82
180
if [ ! -d ${IN} /bio ]; then
83
181
if [ " $1 " = " --small" ]; then
0 commit comments