Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthesizer #4

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions py-2/benchmarks/nginx/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
inputs/
outputs/
1 change: 1 addition & 0 deletions py-2/benchmarks/nginx/checksum.md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3fe6814c6d6f2edd73a83c35f45aa024 results/nginx.sh.out
7 changes: 7 additions & 0 deletions py-2/benchmarks/nginx/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

REPO_TOP=$(git rev-parse --show-toplevel)
results_dir="${REPO_TOP}/covid-mts/results"

echo "Cleaning up outputs..."
rm -rf $results_dir
1 change: 1 addition & 0 deletions py-2/benchmarks/nginx/execution.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## Run this file to execute entire script considering aggregators!
30 changes: 30 additions & 0 deletions py-2/benchmarks/nginx/input.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

REPO_TOP=$(git rev-parse --show-toplevel)
DIR=$REPO_TOP/analysis-logs/input
mkdir -p $DIR
cd $DIR

if [[ $1 == "--kaggle" ]]; then
# Set up Kaggle API
if [[ ! -d ~/.kaggle ]]; then
mkdir ~/.kaggle
echo "Place your kaggle.json in the ~/.kaggle directory."
fi
chmod 600 ~/.kaggle/kaggle.json

if [[ ! -f nginx.zip ]]; then
kaggle datasets download -d eliasdabbas/web-server-access-logs
unzip web-server-access-logs
rm -f web-server-access-logs.zip client_hostname.csv
fi
else
if [[ ! -f nginx.zip ]]; then
# TODO: replace with omega URL
# wget -O nginx.zip "https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/3QBYB5/NXKB6J"
# unzip web-server-access-logs
# rm -f web-server-access-logs.zip
echo "Not implemented yet."
exit 1
fi
fi
2 changes: 2 additions & 0 deletions py-2/benchmarks/nginx/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Running aggregators for script: ./scripts/1.sh and input file: 1
Running aggregators for script: ./scripts/1.sh and input file: 1
77 changes: 77 additions & 0 deletions py-2/benchmarks/nginx/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash
export SUITE_DIR=$(realpath $(dirname "$0"))
export TIMEFORMAT=%R
cd $SUITE_DIR

if [[ "$1" == "--small" ]]; then
echo "Using small input"
export INPUT="$SUITE_DIR/inputs/access.log"
else
# not using this
echo "Using default input"
export IN="$SUITE_DIR/inputs/pg"
fi

# ////////
# original script
# REPO_TOP=$(git rev-parse --show-toplevel)

# eval_dir="${REPO_TOP}/analysis-logs"
# results_dir="${eval_dir}/results"
# inputs_dir="${eval_dir}/input"

# shell="/bin/bash"

# mkdir -p $results_dir

# export INPUT=${inputs_dir}/access.log
# script="${eval_dir}/nginx.sh"

# echo "Executing $(basename "$script")"
# $shell "$script" > "$results_dir/$(basename "$script").out"

# ////////

covid-mts_bash() {
for number in $(
seq 7
); do
script="${number}"
script_file="./scripts/$script.sh"
output_dir="./outputs/bash/$script/"
output_file="./outputs/bash/$script.out"
time_file="./outputs/bash/$script.time"
log_file="./outputs/bash/$script.log"

time bash $script_file $input_file >$output_file 2>$time_file

cat "${time_file}" >>$all_res_file
echo "$script_file $(cat "$time_file")" | tee -a $mode_res_file
done
}

ID=1 # track agg run

covid-mts_agg() {
AGG_FILE="../agg_run.sh"
chmod +x $AGG_FILE
mkdir -p "outputs/agg"
echo executing oneliners agg $(date) | tee -a $mode_res_file $all_res_file

for number in $(seq 7); do
script="${number}"
script_file="./scripts/$script.sh"
output_dir="./outputs/agg/$script/"
output_file="./outputs/agg/$script.out"
time_file="./outputs/agg/$script.time"
log_file="./outputs/agg/$script.log"
{ time ../agg_run.sh $script_file $input_file $ID covid-mts >$output_file; } 2>$time_file #run file with input and direct to output

cat "${time_file}" >>$all_res_file
echo "$script_file $(cat "$time_file")" | tee -a $mode_res_file
((ID++))
done
}

covid-mts_bash
covid-mts_agg
23 changes: 23 additions & 0 deletions py-2/benchmarks/nginx/scripts/1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
# Vehicles on the road per day

# <in.csv sed 's/T..:..:..//' |
# awk -F, '!seen[$1 $3] {onroad[$1]++; seen[$1 $3] = 1}
# END { OFS = "\t"; for (d in onroad) print d, onroad[d]}' |
# sort > out1

# curl https://balab.aueb.gr/~dds/oasa-$(date --date='1 days ago' +'%y-%m-%d').bz2 |
# bzip2 -d | # decompress
# Replace the line below with the two lines above to stream the latest file
# cat "$1" | # assumes saved input
# sed 's/T..:..:..//' | # hide times
# cut -d ',' -f 1,3 | # keep only day and bus no
# sort -u | # remove duplicate records due to time
# cut -d ',' -f 1 | # keep all dates
# sort | # preparing for uniq
# uniq -c | # count unique dates
# awk "{print \$2,\$1}" # print first date, then count

# # diff out{1,}

cat $1 | cat ${INPUT} | cut -d "\"" -f3 | cut -d ' ' -f2 | sort | uniq -c | sort -rn
24 changes: 24 additions & 0 deletions py-2/benchmarks/nginx/scripts/2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# Days a vehicle is on the road

# <in.csv sed 's/T..:..:..//' |
# awk -F, '!seen[$1 $3] {onroad[$3]++; seen[$1 $3] = 1}
# END { OFS = "\t"; for (d in onroad) print d, onroad[d]}' |
# sort -k2n >out1

# curl https://balab.aueb.gr/~dds/oasa-$(date --date='1 days ago' +'%y-%m-%d').bz2 |
# bzip2 -d | # decompress
# Replace the line below with the two lines above to stream the latest file
# cat "$1" | # assumes saved input
# sed 's/T..:..:..//' | # hide times
# cut -d ',' -f 3,1 | # keep only day and bus ID
# sort -u | # removing duplicate day-buses
# cut -d ',' -f 2 | # keep only bus ID
# sort | # preparing for uniq
# uniq -c | # count unique dates
# sort -k 1 -n | # sort in reverse numerical order
# awk "{print \$2,\$1}" # print first date, then count

# diff out{1,}

cat $1 | awk '{print $9}' ${INPUT} | sort | uniq -c | sort -rn
3 changes: 3 additions & 0 deletions py-2/benchmarks/nginx/scripts/3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

cat $1 | awk '($9 ~ /404/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -rn
23 changes: 23 additions & 0 deletions py-2/benchmarks/nginx/scripts/4.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
# Hours monitored each day

# <in.csv sed 's/T\(..\):..:../,\1/' |
# awk -F, '!seen[$1 $2] {hours[$1]++; seen[$1 $2] = 1}
# END { OFS = "\t"; for (d in hours) print d, hours[d]}' |
# sort

# curl https://balab.aueb.gr/~dds/oasa-$(date --date='1 days ago' +'%y-%m-%d').bz2 |
# bzip2 -d | # decompress
# Replace the line below with the two lines above to stream the latest file
# cat "$1" | # assumes saved input
# sed 's/T\(..\):..:../,\1/' | # keep times only
# cut -d ',' -f 1,2 | # keep only time and date
# sort -u | # removing duplicate entries
# cut -d ',' -f 1 | # keep only date
# sort | # preparing for uniq
# uniq -c | # count unique dates
# awk "{print \$2,\$1}" # print first date, then count

# # diff out{1,}

cat $1 | awk '($9 ~ /502/)' ${INPUT} | awk '{print $7}' | sort | uniq -c | sort -r
5 changes: 5 additions & 0 deletions py-2/benchmarks/nginx/scripts/5.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
# Hours each bus is active each day

# Records are day, hour, line, bus
cat $1 | awk -F\" '($2 ~ "/wp-admin/install.php"){print $1}' ${INPUT} | awk '{print $1}' | sort | uniq -c | sort -r
1 change: 1 addition & 0 deletions py-2/benchmarks/nginx/scripts/6.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cat $1 | awk '($9 ~ /404/)' ${INPUT} | awk -F\" '($2 ~ "^GET .*\.php")' | awk '{print $7}' | sort | uniq -c | sort -r | head -n 20
1 change: 1 addition & 0 deletions py-2/benchmarks/nginx/scripts/7.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cat $1 | awk -F\" '{print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r
1 change: 1 addition & 0 deletions py-2/benchmarks/nginx/scripts/8.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cat $1 | awk -F\" '($2 ~ "ref"){print $2}' ${INPUT} | awk '{print $2}' | sort | uniq -c | sort -r
15 changes: 15 additions & 0 deletions py-2/benchmarks/nginx/verify.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

REPO_TOP=$(git rev-parse --show-toplevel)

eval_dir="${REPO_TOP}/analysis-logs/"
results_dir="${eval_dir}/results"
input_dir="${eval_dir}/input"

if [ "$(md5sum $results_dir/* | awk '{print $1}')" == "$(cat $input_dir/checksum.md5 | awk '{print $1}')" ];
then
echo "Valid"
else
echo "Invalid"
exit 1
fi
Loading