Skip to content

Commit

Permalink
some updates to graphing
Browse files Browse the repository at this point in the history
  • Loading branch information
Diandre Sabale committed Dec 26, 2023
2 parents 681062a + 03ec42e commit 2f6862d
Show file tree
Hide file tree
Showing 23 changed files with 848 additions and 289 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
/TrueCardinalities
/Experiments/Results/*
/Experiments/SerializedSummaries/*

/.vscode

31 changes: 20 additions & 11 deletions Experiments/Experiments.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
# Overall Experiments Harness Include File
using Serialization: serialize, deserialize
using BenchmarkTools
using Plots
using Plots.PlotMeasures
using StatsPlots
using CSV, DataFrames
using Parquet2: Dataset
using DelimitedFiles: writedlm
using BenchmarkTools
using Printf
using Random
using StatsPlots
using Distributed
@everywhere using CSV
@everywhere using DataFrames
@everywhere using DelimitedFiles: writedlm
@everywhere using Parquet2: Dataset
@everywhere using Random
@everywhere using Serialization: serialize, deserialize
@everywhere using SharedArrays
@everywhere using WeakRefStrings


include("../Source/CardinalityWithColors.jl")
include("utils.jl")
include("load_datasets.jl")
@everywhere include("../Source/CardinalityWithColors.jl")
@everywhere include("utils.jl")
@everywhere include("load_datasets.jl")
include("load_querysets.jl")
include("build_color_summaries.jl")
@everywhere include("build_color_summaries.jl")
include("get_true_cardinalities.jl")
include("run_estimators.jl")
@everywhere include("run_estimators.jl")
include("graph_results.jl")

const TIMEOUT_SEC::Float64 = 60.0
1 change: 0 additions & 1 deletion Experiments/Scripts/coloring_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ partitioning_schemes = [
[(Hash, 64)],
[(Degree, 8), (QuasiStable, 32), (NeighborNodeLabels, 24)],
[(Degree, 8), (NeighborNodeLabels, 24), (QuasiStable, 32)]]
partitioning_schemes = [[(QuasiStable, 64)]]
experiment_params = Vector{ExperimentParams}()
for dataset in datasets
for scheme in partitioning_schemes
Expand Down
81 changes: 66 additions & 15 deletions Experiments/Scripts/comparison_exps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,12 @@
using Profile
include("../Experiments.jl")

#datasets = [aids, yeast, hprd, dblp, youtube, wordnet]
datasets = [yeast]
#datasets = [human, aids, lubm80, yeast, hprd, dblp, youtube, eu2005, patents, wordnet]
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
#datasets = [human, aids, lubm80]

experiment_params = Vector{ExperimentParams}()
for dataset in datasets
push!(experiment_params, ExperimentParams(deg_stats_type=CorrDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
description = "CorrQ64"))
push!(experiment_params, ExperimentParams(deg_stats_type=CorrDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)],
description = "CorrQ64N64"))
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
Expand All @@ -23,23 +16,81 @@ for dataset in datasets
dataset=dataset,
partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)],
description = "AvgQ64N64"))
#=
push!(experiment_params, ExperimentParams(deg_stats_type=MinDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
max_cycle_size = -1,
description = "MinQ64"))
push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 64)],
max_cycle_size = -1,
description = "MaxQ64"))
push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats,
dataset=dataset,
partitioning_scheme=[(Hash, 64)],
description = "MaxH64"))
max_cycle_size = -1,
inference_max_paths = 10^30,
use_partial_sums = false,
description = "BSK"))
push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats,
dataset=dataset,
partitioning_scheme=[(QuasiStable, 1)],
max_cycle_size = -1,
description = "IndEst")) =#
end

build_experiments(experiment_params)
#build_experiments(experiment_params)

run_estimation_experiments(experiment_params; timeout=1.0)

order = [string(data) for data in datasets]

graph_grouped_boxplot_with_comparison_methods(experiment_params;
ylims=[10^-5, 10^4],
y_ticks=[10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 10^0, 10^1, 10^2, 10^3, 10^4],
y_type = runtime,
x_type = dataset,
x_order = order,
grouping=description,
dimensions = (1450, 550),
legend_pos=:top,
y_label="Inference Latency 10^ (s)",
filename="overall_runtime")

graph_grouped_boxplot_with_comparison_methods(experiment_params;
ylims=[10^-21, 10^21],
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20],
y_type = estimate_error,
x_type = dataset,
x_order = order,
grouping=description,
dimensions = (1450, 550),
legend_pos=:bottomleft,
y_label="Relative Error 10^",
filename="overall_error")


run_estimation_experiments(experiment_params)
graph_grouped_bar_plot(experiment_params;
grouping=description,
y_type=memory_footprint,
x_order = order,
ylims=[0, 50],
y_ticks = [10, 20, 30, 40, 50],
legend_pos=:topright,
dimensions = (1000, 550),
y_label="Memory (MBs)",
filename="overall_memory")

graph_grouped_boxplot_with_comparison_methods(experiment_params; ylims=[10^-5, 10^2], y_type = runtime, grouping=description, y_label="Runtime (s)", filename="comparison_exps_runtime_2")
graph_grouped_boxplot_with_comparison_methods(experiment_params; ylims=[10^-10, 10^15], y_type = estimate_error, grouping=description, y_label="Relative Error", filename="comparison_exps_error_2")
graph_grouped_bar_plot(experiment_params;
grouping=description,
y_type=build_time,
x_order = order,
ylims=[0, 1600],
y_ticks = [200, 400, 600, 800, 1000, 1200, 1400, 1600],
dimensions = (1000, 550),
y_label="Build Time (s)",
filename="overall_build_time")
94 changes: 94 additions & 0 deletions Experiments/Scripts/estimator-failure.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
include("../Experiments.jl")

#datasets = [human, aids]
datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents]
queries = load_querysets(datasets)
num_queries = Dict(string(dataset)=>length(queries[dataset]) for dataset in datasets)

methods, comparison_results = comparison_dataset()

failure_counts = Dict()
failure_probabilities = Dict()
for method in methods
failure_counts[method] = counter(String)
failure_probabilities[method] = Dict()
for dataset in datasets
string_dataset = string(dataset)
for query in queries[dataset]
qid = get_query_id(string_dataset, query.query_path)
comp_key = (string_dataset, method, qid)
if !haskey(comparison_results, comp_key)
inc!(failure_counts[method], string_dataset)
elseif comparison_results[comp_key].Estimate == 0
inc!(failure_counts[method], string_dataset)
elseif comparison_results[comp_key].Estimate == Inf
inc!(failure_counts[method], string_dataset)
elseif comparison_results[comp_key].Estimate == NaN
inc!(failure_counts[method], string_dataset)
end
end
failure_probabilities[method][dataset] = failure_counts[method][string_dataset] / num_queries[string_dataset]
end
end

failure_counts["BSK"] = counter(String)
failure_counts["BSK++"] = counter(String)
failure_counts["AvgQ64"] = counter(String)
failure_probabilities["BSK"] = Dict()
failure_probabilities["BSK++"] = Dict()
failure_probabilities["AvgQ64"] = Dict()
for dataset in datasets
string_dataset = string(dataset)
bsk_params = ExperimentParams(deg_stats_type=MaxDegStats,
dataset=dataset,
partitioning_scheme=[(Hash, 64)],
max_cycle_size = -1,
inference_max_paths = 10^30,
use_partial_sums = false,
description = "BSK",
n_replications = 1)
run_estimation_experiments([bsk_params]; timeout=TIMEOUT_SEC)
bsk_filename = params_to_results_filename(bsk_params)
bsk_path = "Experiments/Results/Estimation_" * bsk_filename
bsk_df = CSV.read(bsk_path, DataFrame; normalizenames=true)
for i in 1:nrow(bsk_df)
if bsk_df[i, :Failure]
inc!(failure_counts["BSK"], string_dataset)
end
end
failure_probabilities["BSK"][string_dataset] = failure_counts["BSK"][string_dataset] / num_queries[string_dataset]


bsk_agg_params = ExperimentParams(deg_stats_type=MaxDegStats,
dataset=dataset,
partitioning_scheme=[(Hash, 64)],
max_cycle_size = -1,
inference_max_paths = 10^30,
use_partial_sums = true,
description = "BSK++",
n_replications=1)
run_estimation_experiments([bsk_agg_params]; timeout=TIMEOUT_SEC)
bsk_agg_filename = params_to_results_filename(bsk_agg_params)
bsk_agg_path = "Experiments/Results/Estimation_" * bsk_agg_filename
bsk_agg_df = CSV.read(bsk_agg_path, DataFrame; normalizenames=true)
for i in 1:nrow(bsk_agg_df)
if bsk_agg_df[i, :Failure]
inc!(failure_counts["BSK++"], string_dataset)
end
end
failure_probabilities["BSK++"][string_dataset] = failure_counts["BSK++"][string_dataset] / num_queries[string_dataset]



avg_params = ExperimentParams(dataset=dataset, n_replications=1)
run_estimation_experiments([avg_params]; timeout=TIMEOUT_SEC)
avg_filename = params_to_results_filename(avg_params)
avg_path = "Experiments/Results/Estimation_" * avg_filename
avg_df = CSV.read(avg_path, DataFrame; normalizenames=true)
for i in 1:nrow(avg_df)
if avg_df[i, :Failure]
inc!(failure_counts["AvgQ64"], string_dataset)
end
end
failure_probabilities["AvgQ64"][string_dataset] = failure_counts["AvgQ64"][string_dataset] / num_queries[string_dataset]
end
10 changes: 5 additions & 5 deletions Experiments/Scripts/just-edge-updates-experiments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ datasets::Vector{DATASET} = [aids, human]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6
proportions_updated = [0, 0.2, 0.4, 0.6, 0.8, 1.0]
proportions_updated = [0, 0.2, 0.4, 0.6, 0.8, .9, 1.0]
# To test deletion, we will add a random node / edge and then delete them...
# proportion_not_updated = 0.5

Expand Down Expand Up @@ -94,7 +94,7 @@ experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=cur
# graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=proportion_not_updated, filename="overall-accuracy-and-updates")
# compare how cycle stat accuracies are affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=proportion_deleted, y_type=estimate_error, x_label="proportion added then deleted", y_label="accuracy", grouping=cycle_size, filename="deletion-experiment")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, y_lims=[0, 30], x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=runtime, y_lims=[0, 0.6], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, y_lims=[0, 20], x_label="Proportion Updated", y_label="Memory Footprint (B)", grouping=proportion_updated, filename="just-edge-updates-memory")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, y_lims=[0, 10], y_ticks = [0, 2, 4 ,6 ,8, 10], legend_pos=:topright, x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error,y_lims=[-20, 15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=runtime, y_lims=[10^-5, 10], y_ticks = [10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 1, 10], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, y_lims=[0, 20], y_ticks = [0, 5, 10, 15, 20], x_label="Proportion Updated", y_label="Memory (MB)", grouping=proportion_updated, filename="just-edge-updates-memory")
Loading

0 comments on commit 2f6862d

Please sign in to comment.