From 3cddc211f769025215890253ca88c2497457897a Mon Sep 17 00:00:00 2001 From: kylebd99 Date: Wed, 20 Dec 2023 11:00:12 -0800 Subject: [PATCH] add query size graphs --- Experiments/Experiments.jl | 1 + Experiments/Scripts/query_size_exps.jl | 69 ++++++++++++++++++++++++++ Experiments/graph_results.jl | 22 ++++++-- 3 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 Experiments/Scripts/query_size_exps.jl diff --git a/Experiments/Experiments.jl b/Experiments/Experiments.jl index 9ad746d..bb4fb66 100644 --- a/Experiments/Experiments.jl +++ b/Experiments/Experiments.jl @@ -8,6 +8,7 @@ using Parquet2: Dataset using DelimitedFiles: writedlm using BenchmarkTools using Random +using Printf include("../Source/CardinalityWithColors.jl") include("utils.jl") diff --git a/Experiments/Scripts/query_size_exps.jl b/Experiments/Scripts/query_size_exps.jl new file mode 100644 index 0000000..df726f4 --- /dev/null +++ b/Experiments/Scripts/query_size_exps.jl @@ -0,0 +1,69 @@ + +using Profile +include("../Experiments.jl") + +#datasets = [human, aids, lubm80, yeast, hprd, dblp, youtube, eu2005, patents, wordnet] +datasets = [human, aids, lubm80, yeast, dblp, youtube, eu2005, patents] + +experiment_params = Vector{ExperimentParams}() +for dataset in datasets + push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats, + dataset=dataset, + partitioning_scheme=[(QuasiStable, 64)], + description = "AvgQ64")) + push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats, + dataset=dataset, + partitioning_scheme=[(QuasiStable, 32), (NeighborNodeLabels, 32),(QuasiStable, 32), (NeighborNodeLabels, 32)], + description = "AvgQ64N64")) + + push!(experiment_params, ExperimentParams(deg_stats_type=MinDegStats, + dataset=dataset, + partitioning_scheme=[(QuasiStable, 64)], + description = "MinQ64")) + + push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats, + dataset=dataset, + partitioning_scheme=[(QuasiStable, 64)], + description = "MaxQ64")) + + push!(experiment_params, ExperimentParams(deg_stats_type=MaxDegStats, + dataset=dataset, + partitioning_scheme=[(Hash, 64)], + max_cycle_size = -1, + description = "BSK")) + + push!(experiment_params, ExperimentParams(deg_stats_type=AvgDegStats, + dataset=dataset, + partitioning_scheme=[(QuasiStable, 1)], + max_cycle_size = -1, + description = "IndEst")) +end + +#build_experiments(experiment_params) + +#run_estimation_experiments(experiment_params) + +graph_grouped_boxplot_with_comparison_methods(experiment_params; + ylims=[10^-5, 10^4], + y_ticks=[10^-5, 10^-2, 10^0, 10^2, 10^4], + x_type = query_size, + y_type = runtime, + grouping=description, + dimensions = (1450, 550), + legend_pos=:topleft, + y_label="Runtime 10^ (s)", + x_label = "Query Size", + filename="query_size_runtime") + + +graph_grouped_boxplot_with_comparison_methods(experiment_params; + ylims=[10^-21, 10^21], + x_type = query_size, + y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 10^-2, 10^0, 10^2, 10^5, 10^10, 10^15, 10^20], + y_type = estimate_error, + grouping=description, + dimensions = (1450, 550), + legend_pos=:bottomleft, + y_label="Relative Error 10^", + x_label = "Query Size", + filename="query_size_error") diff --git a/Experiments/graph_results.jl b/Experiments/graph_results.jl index 50f57bd..1a5ee24 100644 --- a/Experiments/graph_results.jl +++ b/Experiments/graph_results.jl @@ -107,6 +107,15 @@ function get_query_id(dataset, query_path) end end +function query_size_category(s) + categories = [3, 4, 6, 9, 12, 16, 24, 32] + for cat in categories + if s <= cat + return cat + end + end +end + function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::Vector{ExperimentParams}; x_type::GROUP=dataset, y_type::VALUE=estimate_error, @@ -136,7 +145,7 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V current_x = if x_type == dataset data elseif x_type == query_size - results_df[i, :QuerySize] + @sprintf "%02i" query_size_category(results_df[i, :QuerySize]) end current_group = string(grouping == query_type ? results_df[i, :QueryType] : get_value_from_param(experiment_params, grouping)) current_y = if y_type == estimate_error @@ -146,7 +155,7 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V end true_card[(data, get_query_id(string(experiment_params.dataset), results_df[i, :QueryPath]))] = (results_df[i, :TrueCard], current_x) # push the errors and their groupings into the correct vector - push!(x_values, current_x) + push!(x_values, string(current_x)) push!(y_values, current_y) push!(estimators, current_group) end @@ -179,13 +188,16 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V runtime / 1000.0 end # push the errors and their groupings into the correct vector - push!(x_values, current_x) + push!(x_values, string(current_x)) push!(y_values, current_y) push!(estimators, estimator) end end + sorted_vals = sort(zip(x_values, y_values, estimators), by=(x)->x[1]) + x_values = [x[1] for x in sorted_vals] + y_values = [x[2] for x in sorted_vals] + estimators = [x[3] for x in sorted_vals] println("starting graphs") - # This seems to be necessary for using Plots.jl outside of the ipynb framework. # See this: https://discourse.julialang.org/t/deactivate-plot-display-to-avoid-need-for-x-server/19359/15 ENV["GKSwstype"]="100" @@ -196,7 +208,7 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V y_ticks = [log10(y) for y in y_ticks], legend = legend_pos, size = dimensions, - bottom_margin = 20px, + bottom_margin = 40px, top_margin = 20px, left_margin = 10mm, legend_column = 2,