Skip to content

Commit 4f2c79d

Browse files
authored
Fixes (#442)
* export default input for poets to 1060 Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Remove old files Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * cleanup is moved to the run files Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Path fixes Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fixes to cleanup and dataset fetching Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Strip spaces from generated output Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fix output_time flag Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fixed CI/CD bug related to timers Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * switch max-temp.sh to temp-analytics.sh Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * modified execution scripts to support --small flag Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fix script names Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fixed typo on setup for small inputs Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fix on oneliners setup Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * setup fixes Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Add small setup for poets Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * fixes on web-index install-deps scripts Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * silence the output when downloading the datasets Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Rename poets benchmark to nlp Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * Fixes to dataset/deps installation Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * fix on poets setup path Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * bugfix on setup installation on max-temp Signed-off-by: DIMITRIS KARNIKIS <[email protected]> * move dgsh repo to binpash org Signed-off-by: Dimitris Karnikis <[email protected]>
1 parent b7300f8 commit 4f2c79d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+749
-716
lines changed

compiler/test_evaluation_scripts.sh

+8-9
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ do
2222
fi
2323
done
2424

25-
microbenchmarks_dir="${PASH_TOP}/evaluation/tests/"
26-
intermediary_dir="${PASH_TOP}/evaluation/tests/test_intermediary/"
27-
test_results_dir="${PASH_TOP}/evaluation/tests/results/"
25+
microbenchmarks_dir="${PASH_TOP}/evaluation/tests"
26+
intermediary_dir="${PASH_TOP}/evaluation/tests/test_intermediary"
27+
test_results_dir="${PASH_TOP}/evaluation/tests/results"
2828
results_time="$test_results_dir/results.time"
2929
results_time_bash=${results_time}_bash
3030
results_time_pash=${results_time}_pash
@@ -106,10 +106,10 @@ execute_pash_and_check_diff() {
106106
{ time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2> >(tee -a "${pash_time}" >&2) &&
107107
diff -s "$seq_output" "$pash_output" | head | tee -a "${pash_time}" >&2
108108
else
109-
{ time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2>> "${pash_time}" &&
110-
b=$(cat "$pash_time");
111-
c=$(diff -s "$seq_output" "$pash_output" | head)
112-
echo "$c$b" > "${pash_time}"
109+
{ time "$PASH_TOP/pa.sh" $@ ; } 1> "$pash_output" 2> "${pash_time}"
110+
timer=$(cat "$pash_time");
111+
difference=$(diff -s "$seq_output" "$pash_output" | head)
112+
echo "$difference$timer" > "${pash_time}"
113113
fi
114114
}
115115

@@ -171,15 +171,14 @@ execute_tests() {
171171
for conf in "${configurations[@]}"; do
172172
for n_in in "${n_inputs[@]}"; do
173173
echo "|-- Executing with pash --width ${n_in} ${conf}..."
174-
export pash_time="${test_results_dir}/${microbenchmark}_${n_in}_distr_${conf}.time"
174+
export pash_time="${test_results_dir}/${microbenchmark}_${n_in}_distr_$(echo ${conf} | tr -d ' ').time"
175175
export pash_output="${intermediary_dir}/${microbenchmark}_${n_in}_pash_output"
176176
export script_conf=${microbenchmark}_${n_in}
177177
echo '' > "${pash_time}"
178178
# do we need to write the PaSh output ?
179179
cat $stdin_redir |
180180
execute_pash_and_check_diff -d $PASH_LOG $assert_correctness ${conf} --width "${n_in}" --output_time $script_to_execute
181181
tail -n1 "${pash_time}" >> "${results_time_pash}_${n_in}"
182-
183182
done
184183
done
185184
done

compiler/util.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ def pad(lst, index):
1919
def print_time_delta(prefix, start_time, end_time, args=None):
2020
## Always output time in the log.
2121
time_difference = (end_time - start_time) / timedelta(milliseconds=1)
22-
log("{} time:".format(prefix), time_difference, " ms")
22+
## If output_time flag is set, log the time
23+
if (config.pash_args.output_time == 1):
24+
log("{} time:".format(prefix), time_difference, " ms", level=0)
25+
else:
26+
log("{} time:".format(prefix), time_difference, " ms")
2327

2428
## This is a wrapper for prints
2529
##

evaluation/benchmarks/analytics-mts/input/setup.sh

+22-9
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,26 @@ if [[ "$1" == "-c" ]]; then
1313
exit
1414
fi
1515

16-
if [ ! -f ./in.csv ]; then
17-
# yesterday=$(date --date='1 days ago' +'%y-%m-%d')
18-
# curl https://www.balab.aueb.gr/~dds/oasa-$yesterday.bz2 |
19-
curl -sf 'https://www.balab.aueb.gr/~dds/oasa-2021-01-08.bz2' | bzip2 -d > in.csv
20-
if [ $? -ne 0 ]; then
21-
echo "oasa-2021-01-08.bz2 / bzip2 not available, contact the pash authors"
22-
exit 1
16+
setup_dataset() {
17+
if [ ! -f ./in.csv ]; then
18+
# yesterday=$(date --date='1 days ago' +'%y-%m-%d')
19+
# curl https://www.balab.aueb.gr/~dds/oasa-$yesterday.bz2 |
20+
curl -sf 'https://www.balab.aueb.gr/~dds/oasa-2021-01-08.bz2' | bzip2 -d > in.csv
21+
if [ $? -ne 0 ]; then
22+
echo "oasa-2021-01-08.bz2 / bzip2 not available, contact the pash authors"
23+
exit 1
24+
fi
25+
"$PASH_TOP/scripts/append_nl_if_not.sh" in.csv
26+
len=$(cat in.csv | wc -l)
27+
half_size=$(( $len / 4 ))
28+
head -n $half_size in.csv > in_small.csv
2329
fi
24-
"$PASH_TOP/scripts/append_nl_if_not.sh" in.csv
25-
fi
30+
}
31+
32+
source_var() {
33+
if [[ "$1" == "--small" ]]; then
34+
export IN="input/in_small.csv"
35+
else
36+
export IN="input/in.csv"
37+
fi
38+
}

evaluation/benchmarks/for-loops/compress_files.sh evaluation/benchmarks/dependency_untangling/compress_files.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# compress all files in a directory
3-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input/pcap_data/}
4-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/compress}
3+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data/}
4+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/compress}
55
LOGS=${OUT}/logs
66
mkdir -p ${OUT}/logs
77
run_tests() {

evaluation/benchmarks/for-loops/encrypt_files.sh evaluation/benchmarks/dependency_untangling/encrypt_files.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# encrypt all files in a directory
3-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input/pcap_data}
4-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/encrypt}
3+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data}
4+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/encrypt}
55
LOGS=${OUT}/logs
66
mkdir -p ${LOGS}
77
run_tests() {

evaluation/benchmarks/for-loops/genome.sh evaluation/benchmarks/dependency_untangling/genomics.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/bin/bash
22
# create bam files with regions
33
################### 1KG SAMPLES
4-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input}
5-
SAMTOOLS_BIN=${IN}/samtools-1.7/samtools
6-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/bio}
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input}
5+
SAMTOOLS_BIN=${IN}/deps/samtools-1.7/samtools
6+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/bio}
77
LOGS=${OUT}/logs
8-
IN_NAME=${IN}/100G.txt
9-
GENE_LOCS=${IN}/Gene_locs.txt
8+
IN_NAME=${IN}/bio/100G.txt
9+
GENE_LOCS=${IN}/bio/Gene_locs.txt
1010
mkdir -p ${LOGS}
1111
run_tests() {
1212
s_line=$(echo $1 | tr '@' ' ')
@@ -21,7 +21,7 @@ run_tests() {
2121
# create bai file
2222
$SAMTOOLS_BIN index -b "${OUT}/$sample"_corrected.bam 2> /dev/null
2323
### Isolating each relevant chromosome based on Gen_locs
24-
cut -f 2 ${IN}/Gene_locs.txt |sort |uniq |while read chr;
24+
cut -f 2 ${IN}/bio/Gene_locs.txt |sort |uniq |while read chr;
2525
do
2626
echo 'Isolating Chromosome '$chr' from sample '${OUT}/$sample', ';
2727
$SAMTOOLS_BIN view -b "${OUT}/$sample"_corrected.bam chr"$chr" > "${OUT}/$pop"_"$sample"_"$chr".bam 2> /dev/null

evaluation/benchmarks/for-loops/img_convert.sh evaluation/benchmarks/dependency_untangling/img_convert.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# tag: resize image
3-
IN=${JPG:-$PASH_TOP/evaluation/benchmarks/for-loops/input/jpg}
4-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/jpg}
3+
IN=${JPG:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/jpg}
4+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/jpg}
55
mkdir -p ${OUT}
66
for i in $IN/*.jpg;
77
do
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
IN=$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/
2+
mkdir -p ${IN}/deps/
3+
# install dependencies
4+
pkgs='ffmpeg unrtf imagemagick libarchive-tools zstd liblzma-dev libbz2-dev zip unzip nodejs'
5+
6+
if ! dpkg -s $pkgs >/dev/null 2>&1 ; then
7+
sudo apt-get install $pkgs -y
8+
echo 'Packages Installed'
9+
fi
10+
11+
if [ ! -d ${IN}/deps/samtools-1.7 ]; then
12+
cd ${IN}/deps/
13+
wget https://github.com/samtools/samtools/archive/refs/tags/1.7.zip
14+
unzip 1.7.zip
15+
rm 1.7.zip
16+
cd samtools-1.7
17+
wget https://github.com/samtools/htslib/archive/refs/tags/1.7.zip
18+
unzip 1.7.zip
19+
autoheader # Build config.h.in (this may generate a warning about
20+
# AC_CONFIG_SUBDIRS - please ignore it).
21+
autoconf -Wno-syntax # Generate the configure script
22+
./configure # Needed for choosing optional functionality
23+
make
24+
rm -rf 1.7.zip
25+
echo 'Samtools installed'
26+
fi
27+
28+
if [ ! -f ${IN}/deps/makedeb.deb ]; then
29+
cd ${IN}/deps/
30+
wget http://pac-n4.csail.mit.edu:81/pash_data/makedeb.deb
31+
sudo dpkg -i makedeb.deb
32+
echo 'Makedeb installed'
33+
fi
34+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
#!/bin/bash
2+
3+
# exit when any command fails
4+
#set -e
5+
6+
IN=$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/
7+
OUT=$PASH_TOP/evaluation/benchmarks/dependency_untangling/output/
8+
IN_NAME=$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/100G.txt
9+
10+
if [ "$1" == "-c" ]; then
11+
rm -rf ${IN}/jpg
12+
rm -rf ${IN}/log_data
13+
rm -rf ${IN}/wav
14+
rm -rf ${IN}/nginx-logs
15+
rm -rf ${IN}/node_modules
16+
rm -rf ${IN}/pcap_data
17+
rm -rf ${IN}/pcaps
18+
rm -rf ${IN}/packages
19+
rm -rf ${IN}/mir-sa
20+
rm -rf ${IN}/deps
21+
rm -rf ${IN}/bio
22+
rm -rf ${IN}/output
23+
rm -rf ${OUT}
24+
exit
25+
fi
26+
27+
setup_dataset() {
28+
if [ "$1" == "--small" ]; then
29+
LOG_DATA_FILES=6
30+
WAV_DATA_FILES=20
31+
NODE_MODULE_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/node_modules.zip
32+
BIO_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/bio.zip
33+
JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/small/jpg.zip
34+
PCAP_DATA_FILES=1
35+
else
36+
LOG_DATA_FILES=84
37+
WAV_DATA_FILES=120
38+
NODE_MODULE_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/node_modules.zip
39+
BIO_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/bio.zip
40+
JPG_DATA_LINK=http://pac-n4.csail.mit.edu:81/pash_data/full/jpg.zip
41+
PCAP_DATA_FILES=15
42+
fi
43+
44+
if [ ! -d ${IN}/wav ]; then
45+
mkdir -p ${IN}/wav
46+
cd ${IN}/wav
47+
wget https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav
48+
wget https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_2MG.wav
49+
wget https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_5MG.wav
50+
wget https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_10MG.wav
51+
for f in *.wav; do
52+
FILE=$(basename "$f")
53+
for (( i = 0; i <= $WAV_DATA_FILES; i++)) do
54+
echo copying to $f$i.wav
55+
cp $f $f$i.wav
56+
done
57+
done
58+
echo "WAV Generated"
59+
fi
60+
61+
if [ ! -d ${IN}/jpg ]; then
62+
cd ${IN}
63+
wget $JPG_DATA_LINK
64+
unzip jpg.zip
65+
echo "JPG Generated"
66+
rm -rf ${IN}/jpg.zip
67+
fi
68+
69+
# download the input for the nginx logs and populate the dataset
70+
if [ ! -d ${IN}/log_data ]; then
71+
cd $IN
72+
wget http://pac-n4.csail.mit.edu:81/pash_data/nginx.zip
73+
unzip nginx.zip
74+
rm nginx.zip
75+
# generating analysis logs
76+
mkdir -p ${IN}/log_data
77+
for (( i = 1; i <=$LOG_DATA_FILES; i++)) do
78+
for j in nginx-logs/*;do
79+
n=$(basename $j)
80+
cat $j > log_data/log${i}_${n}.log;
81+
done
82+
done
83+
echo "Logs Generated"
84+
fi
85+
86+
if [ ! -d ${IN}/bio ]; then
87+
if [ "$1" = "--small" ]; then
88+
# download the Genome loc file
89+
wget $BIO_DATA_LINK
90+
unzip bio.zip
91+
cd bio
92+
wget http://pac-n4.csail.mit.edu:81/pash_data/Gene_locs.txt
93+
wget http://pac-n4.csail.mit.edu:81/pash_data/small/100G.txt
94+
cd ..
95+
rm bio.zip
96+
else
97+
mkdir ${IN}/bio
98+
cd ${IN}/bio
99+
# download the file containing the links for the dataset
100+
wget http://pac-n4.csail.mit.edu:81/pash_data/100G.txt
101+
# download the Genome loc file
102+
wget http://pac-n4.csail.mit.edu:81/pash_data/Gene_locs.txt
103+
# start downloading the real dataset
104+
cat ${IN_NAME} |while read s_line;
105+
do
106+
echo ${IN_NAME}
107+
sample=$(echo $s_line |cut -d " " -f 2);
108+
if [[ ! -f $sample ]]; then
109+
pop=$(echo $s_line |cut -f 1 -d " ");
110+
link=$(echo $s_line |cut -f 3 -d " ");
111+
wget -O "$sample".bam "$link"; ##this part can be adjusted maybe
112+
fi
113+
done;
114+
fi
115+
echo "Genome data downloaded"
116+
fi
117+
118+
# download the initial pcaps to populate the whole dataset
119+
if [ ! -d ${IN}/pcap_data ]; then
120+
cd $IN
121+
wget http://pac-n4.csail.mit.edu:81/pash_data/pcaps.zip
122+
unzip pcaps.zip
123+
rm pcaps.zip
124+
mkdir ${IN}/pcap_data/
125+
# generates 20G
126+
for (( i = 1; i <= $PCAP_DATA_FILES; i++ )) do
127+
for j in ${IN}/pcaps/*;do
128+
n=$(basename $j)
129+
cat $j > pcap_data/pcap${i}_${n};
130+
done
131+
done
132+
echo "Pcaps Generated"
133+
fi
134+
135+
# download the modules for the Mir static analyses
136+
if [ ! -d ${IN}/node_modules ]; then
137+
cd $IN
138+
wget $NODE_MODULE_LINK
139+
unzip node_modules.zip
140+
rm node_modules.zip
141+
# download the specific mir version
142+
wget http://pac-n4.csail.mit.edu:81/pash_data/mir-sa.zip
143+
unzip mir-sa.zip
144+
rm mir-sa.zip
145+
echo "Node modules generated"
146+
fi
147+
148+
# download the packages for the package building
149+
if [ ! -f ${IN}/packages ]; then
150+
cd $IN
151+
wget http://pac-n4.csail.mit.edu:81/pash_data/packages
152+
if [ "$1" = "--small" ]; then
153+
head -n 20 packages > p
154+
mv p packages
155+
fi
156+
echo "Package datset downloaded"
157+
fi
158+
}
159+
160+
source_var() {
161+
export IN=
162+
}

evaluation/benchmarks/for-loops/nginx.sh evaluation/benchmarks/dependency_untangling/nginx.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# tag: nginx logs
3-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input/log_data}
4-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/nginx-logs}
3+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/log_data}
4+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/nginx-logs}
55
mkdir -p ${OUT}
66

77
run_tests() {

evaluation/benchmarks/for-loops/pacaur.sh evaluation/benchmarks/dependency_untangling/pacaur.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
2-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input/packages}
3-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/packages}
2+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/packages}
3+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/packages}
44
LOGS=${OUT}/logs
55
mkdir -p ${OUT} ${LOGS}
66

@@ -38,4 +38,3 @@ do
3838
done
3939

4040
echo 'done';
41-
rm -rf ${OUT}

evaluation/benchmarks/for-loops/pcap.sh evaluation/benchmarks/dependency_untangling/pcap.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
#tag: pcap analysis
3-
IN=${IN:-$PASH_TOP/evaluation/benchmarks/for-loops/input/pcap_data}
4-
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/for-loops/input/output/pcap-analysis}
3+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/pcap_data}
4+
OUT=${OUT:-$PASH_TOP/evaluation/benchmarks/dependency_untangling/input/output/pcap-analysis}
55
LOGS=${OUT}/logs
66
mkdir -p ${LOGS}
77
run_tests() {

0 commit comments

Comments
 (0)