Skip to content

Commit cc49ac9

Browse files
committed
First attempt on cleaning up evaluation
1 parent c6513c1 commit cc49ac9

File tree

2,223 files changed

+253036
-132643
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,223 files changed

+253036
-132643
lines changed
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
./oasa-2021-01-08.bz2
2-
./input.csv
2+
./in.csv
33
./out
44
./out1

evaluation/buses/1.sh evaluation/benchmarks/analytics-mts/1.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/bin/bash
2-
# This script is part of a study on OASA's Telematics
3-
# Diomidis Spinellis and Eleftheria Tsaliki
4-
# https://insidestory.gr/article/noymera-leoforeia-athinas
2+
# Vehicles on the road per day
53

6-
# # Vehicles on the road per day
74
# <in.csv sed 's/T..:..:..//' |
85
# awk -F, '!seen[$1 $3] {onroad[$1]++; seen[$1 $3] = 1}
96
# END { OFS = "\t"; for (d in onroad) print d, onroad[d]}' |

evaluation/buses/2.sh evaluation/benchmarks/analytics-mts/2.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/bin/bash
2-
# This script is part of a study on OASA's Telematics
3-
# Diomidis Spinellis and Eleftheria Tsaliki
4-
# https://insidestory.gr/article/noymera-leoforeia-athinas
2+
# Days a vehicle is on the road
53

6-
# # Days a vehicle is on the road
74
# <in.csv sed 's/T..:..:..//' |
85
# awk -F, '!seen[$1 $3] {onroad[$3]++; seen[$1 $3] = 1}
96
# END { OFS = "\t"; for (d in onroad) print d, onroad[d]}' |

evaluation/buses/3.sh evaluation/benchmarks/analytics-mts/3.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/bin/bash
2-
# This script is part of a study on OASA's Telematics
3-
# Diomidis Spinellis and Eleftheria Tsaliki
4-
# https://insidestory.gr/article/noymera-leoforeia-athinas
2+
# Hours each vehicle is on the road
53

6-
# # Hours each vehicle is on the road
74
# <in.csv sed 's/T\(..\):..:../,\1/' |
85
# awk -F, '!seen[$1 $2 $4] {onroad[$4]++; seen[$1 $2 $4] = 1}
96
# END { OFS = "\t"; for (d in onroad) print d, onroad[d]}' |

evaluation/buses/4.sh evaluation/benchmarks/analytics-mts/4.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#!/bin/bash
2-
# This script is part of a study on OASA's Telematics
3-
# Diomidis Spinellis and Eleftheria Tsaliki
4-
# https://insidestory.gr/article/noymera-leoforeia-athinas
2+
# Hours monitored each day
53

6-
# # Hours monitored each day
74
# <in.csv sed 's/T\(..\):..:../,\1/' |
85
# awk -F, '!seen[$1 $2] {hours[$1]++; seen[$1 $2] = 1}
96
# END { OFS = "\t"; for (d in hours) print d, hours[d]}' |

evaluation/buses/5.sh evaluation/benchmarks/analytics-mts/5.sh

-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
#!/bin/bash
2-
# This script is part of a study on OASA's Telematics
3-
# Diomidis Spinellis and Eleftheria Tsaliki
4-
# https://insidestory.gr/article/noymera-leoforeia-athinas
5-
62
# Hours each bus is active each day
73

84
# Records are day, hour, line, bus
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
# curl https://www.balab.aueb.gr/~dds/oasa-$(date --date='1 days ago' +'%y-%m-%d').bz2 |
3+
curl https://www.balab.aueb.gr/~dds/oasa-2021-01-08.bz2 |
4+
bzip2 -d > in.csv
File renamed without changes.
File renamed without changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.txt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/bin/bash
2+
# Auxiliary functions for bi-grams
3+
4+
bigrams_aux()
5+
{
6+
s2=$(mktemp -u)
7+
mkfifo $s2
8+
tee $s2 |
9+
tail -n +2 |
10+
paste $s2 - |
11+
sed '$d'
12+
rm $s2
13+
}
14+
15+
bigram_aux_map()
16+
{
17+
IN=$1
18+
OUT=$2
19+
AUX_HEAD=$3
20+
AUX_TAIL=$4
21+
22+
s2=$(mktemp -u)
23+
aux1=$(mktemp -u)
24+
aux2=$(mktemp -u)
25+
aux3=$(mktemp -u)
26+
temp=$(mktemp -u)
27+
28+
mkfifo $s2
29+
mkfifo $aux1
30+
mkfifo $aux2
31+
mkfifo $aux3
32+
33+
## New way of doing it using an intermediate file. This is slow
34+
## but doesn't deadlock
35+
cat $IN > $temp
36+
37+
sed '$d' $temp > $aux3 &
38+
cat $temp | head -n 1 > $AUX_HEAD &
39+
cat $temp | tail -n 1 > $AUX_TAIL &
40+
cat $temp | tail -n +2 | paste $aux3 - > $OUT &
41+
42+
# ## Old way of doing it
43+
# cat $IN |
44+
# tee $s2 $aux1 $aux2 |
45+
# tail -n +2 |
46+
# paste $s2 - > $OUT &
47+
48+
# ## The goal of this is to write the first line of $IN in the $AUX_HEAD
49+
# ## stream and the last line of $IN in $AUX_TAIL
50+
51+
# cat $aux1 | ( head -n 1 > $AUX_HEAD; $PASH_TOP/evaluation/tools/drain_stream.sh ) &
52+
# # while IFS= read -r line
53+
# # do
54+
# # old_line=$line
55+
# # done < $aux2
56+
# # echo "$old_line" > $AUX_TAIL
57+
# ( tail -n 1 $aux2 > $AUX_TAIL; $PASH_TOP/evaluation/tools/drain_stream.sh ) &
58+
59+
wait
60+
61+
rm $temp
62+
rm $s2
63+
rm $aux1
64+
rm $aux2
65+
rm $aux3
66+
}
67+
68+
bigram_aux_reduce()
69+
{
70+
IN1=$1
71+
AUX_HEAD1=$2
72+
AUX_TAIL1=$3
73+
IN2=$4
74+
AUX_HEAD2=$5
75+
AUX_TAIL2=$6
76+
OUT=$7
77+
AUX_HEAD_OUT=$8
78+
AUX_TAIL_OUT=$9
79+
80+
temp=$(mktemp -u)
81+
82+
mkfifo $temp
83+
84+
cat $AUX_HEAD1 > $AUX_HEAD_OUT &
85+
cat $AUX_TAIL2 > $AUX_TAIL_OUT &
86+
paste $AUX_TAIL1 $AUX_HEAD2 > $temp &
87+
cat $IN1 $temp $IN2 > $OUT &
88+
89+
wait
90+
91+
rm $temp
92+
}
93+
94+
export -f bigrams_aux
95+
export -f bigram_aux_map
96+
export -f bigram_aux_reduce
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
# Find all 2-grams in a piece of text
3+
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
5+
6+
. bi-gram.aux.sh
7+
8+
cat $IN |
9+
tr -cs A-Za-z '\n' |
10+
tr A-Z a-z |
11+
bigrams_aux |
12+
sort |
13+
uniq
14+
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
# Compares two streams element by element
3+
# Taken from https://crashingdaily.wordpress.com/2008/03/06/diff-two-stdout-streams/
4+
# shuf() { awk 'BEGIN {srand(); OFMT="%.17f"} {print rand(), $0}' "$@" | sort -k1,1n | cut -d ' ' -f2-; }
5+
6+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
7+
8+
mkfifo s1 s2
9+
10+
cat $IN |
11+
# shuf |
12+
tr [:lower:] [:upper:] |
13+
sort > s1 &
14+
15+
cat $IN |
16+
# shuf |
17+
tr [:upper:] [:lower:] |
18+
sort > s2 &
19+
20+
diff -B s1 s2
21+
rm s1 s2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Match complex regular-expression over input
3+
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
5+
6+
cat $IN | tr A-Z a-z | grep '\(.\).*\1\(.\).*\2\(.\).*\3\(.\).*\4'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
# Show the set-difference between two streams (i.e., elements in the first that are not in the second).
3+
# https://stackoverflow.com/questions/2509533/bash-linux-set-difference-between-two-text-files
4+
5+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
6+
7+
mkfifo s1 s2
8+
9+
cat $IN |
10+
cut -d ' ' -f 1 |
11+
tr [:lower] [:upper] |
12+
sort > s1 &
13+
14+
cat $IN |
15+
cut -d ' ' -f 1 |
16+
sort > s2 &
17+
18+
comm -23 s1 s2
19+
20+
rm s1 s2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# call the script with its absolute name
6+
cd $(dirname $0)
7+
8+
curl 'http://ndr.md/data/dummy/1M.txt' > 1M.txt
9+
# download wamerican-insane dictionary and sort according to machine
10+
curl 'http://ndr.md/data/dummy/dict.txt' | sort > dict.txt
11+
12+
touch 10M.txt
13+
for (( i = 0; i < 10; i++ )); do
14+
cat 1M.txt >> 10M.txt
15+
done
16+
17+
touch 100M.txt
18+
for (( i = 0; i < 10; i++ )); do
19+
cat 10M.txt >> 100M.txt
20+
done
21+
22+
touch 1G.txt
23+
for (( i = 0; i < 10; i++ )); do
24+
cat 100M.txt >> 1G.txt
25+
done
26+
27+
if [ "$#" -eq 1 ] && [ "$1" = "--full" ]; then
28+
echo Generting full-size inputs
29+
30+
touch 3G.txt
31+
for (( i = 0; i < 3; i++ )); do
32+
cat 1G.txt >> 3G.txt
33+
done
34+
35+
touch 10G.txt
36+
for (( i = 0; i < 10; i++ )); do
37+
cat 1G.txt >> 10G.txt
38+
done
39+
40+
touch 100G.txt
41+
for (( i = 0; i < 10; i++ )); do
42+
cat 10G.txt >> 100G.txt
43+
done
44+
fi
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# Find the shortest scripts
3+
# From "Wicked Cool Shell Scripts", 2nd Ed., pg. 7
4+
# +p.95 multiple sed
5+
# +p.XX crawler
6+
7+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
8+
9+
cat $IN | xargs file | grep "shell script" | cut -d: -f1 | xargs -L 1 wc -l | grep -v '^0$' | sort -n | head -15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Calculate sort twice
3+
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
5+
6+
cat $IN | tr A-Z a-z | sort | sort -r
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
# Sort input
3+
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
5+
6+
cat $IN | sort
7+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
# Calculate mispelled words in an input
3+
# https://dl.acm.org/doi/10.1145/3532.315102
4+
5+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
6+
7+
cat $IN |
8+
# groff -t -e -mandoc -Tascii | # remove formatting commands
9+
col -bx | # remove backspaces / linefeeds
10+
tr -cs A-Za-z '\n' |
11+
tr A-Z a-z | # map upper to lower case
12+
tr -d '[:punct:]' | # remove punctuation
13+
sort | # put words in alphabetical order
14+
uniq | # remove duplicate words
15+
comm -23 - $dict # report words not in dictionary
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
# Top-N (1000) terms
3+
# from https://dl.acm.org/doi/10.1145/5948.315654
4+
5+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
6+
7+
cat $IN | tr -cs A-Za-z '\n' | tr A-Z a-z | sort | uniq -c | sort -rn | sed 100q
8+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
# Calculate the frequency of each word in the document, and sort by frequency
3+
4+
IN=${IN:-$PASH_TOP/evaluation/benchmarks/expert-oneliners/10G.txt}
5+
6+
cat $IN | tr -cs A-Za-z'\n' | tr A-Z a-z | sort | uniq -c | sort -rn

evaluation/scripts/max-temp/max-temp.sh evaluation/benchmarks/max-temp/max-temp.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#!/bin/bash
2+
#Calculate maximum temperature across the US over five years
23

34
#NOTE: The `head -n 1 below is for minimizing the number of pages to be seen
45

56
# `seq` is similar to {1995..2005}, but this requires shell expansion rules that
67
# are quite convoluted
7-
seq 2005 2005 |
8+
seq 2015 2019 |
89
sed 's;^;http://ndr.md/data/noaa/;' |
910
sed 's;$;/;' |
1011
xargs -n 1 curl -s |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
exodus
2+
genesis
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
inputs/*
22
intermediary/*
3+
*.txt

0 commit comments

Comments
 (0)