Skip to content

Commit

Permalink
fix uniform sampling strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebd99 committed Dec 21, 2023
1 parent c7bfd62 commit 999c880
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 16 deletions.
8 changes: 4 additions & 4 deletions Experiments/Scripts/run-cycle-experiments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ using Plots.PlotMeasures
include("../Experiments.jl")

# datasets::Vector{DATASET} = [aids, human, lubm80, yago, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
data = [yeast]
datasets = [youtube]
max_cycles = 6
experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, max_cycle_size=current_size) for current_dataset in data for current_size in 2:max_cycles]
experiment_params_list = ExperimentParams[ExperimentParams(dataset=current_dataset, max_cycle_size=current_size) for current_dataset in datasets for current_size in 1:max_cycles]

# println("started building")
#build_experiments(experiment_params_list)
build_experiments(experiment_params_list)
# println("started estimating")
#run_estimation_experiments(experiment_params_list)
run_estimation_experiments(experiment_params_list)
println("started graphing")

graph_grouped_box_plot(experiment_params_list;
Expand Down
25 changes: 16 additions & 9 deletions Experiments/Scripts/run-inference-sampling-experiments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,41 @@ using Plots.PlotMeasures
include("../Experiments.jl")

# datasets::Vector{DATASET} = [aids, human, lubm80, yago, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
data::Vector{DATASET} = [yeast]
max_paths = [10, 100, 1000, 10000]
experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, inference_max_paths=current_paths) for current_dataset in data for current_paths in max_paths]
datasets = [youtube]
max_paths = [10, 100, 500, 2000, 10000]
experiment_params_list = ExperimentParams[]
for dataset in datasets
for current_paths in max_paths
push!(experiment_params_list, ExperimentParams(dataset=dataset, inference_max_paths=current_paths, sampling_strategy = redistributive, description="Importance"))
push!(experiment_params_list, ExperimentParams(dataset=dataset, inference_max_paths=current_paths, sampling_strategy = uniform, description="Uniform"))
end
end

println("started building")
#build_experiments(experiment_params_list)
println("started estimating")
#run_estimation_experiments(experiment_params_list)
println("started graphing")
graph_grouped_box_plot(experiment_params_list,
x_type=dataset,
x_type=inference_paths,
y_type=estimate_error,
ylims=[10^-20, 10^20],
y_ticks=[10^-20, 10^-15, 10^-10, 10^-5, 1, 10^5, 10^10, 10^15],
dimensions = (600, 550),
dimensions = (600, 400),
legend_pos = :topleft,
x_label="Maximum Inference Paths",
y_label="Estimate Error 10^",
grouping=inference_paths,
grouping=description,
filename="inference-paths-error")

graph_grouped_box_plot(experiment_params_list,
x_type=dataset,
x_type=inference_paths,
y_type=runtime,
ylims=[.0001, 100],
y_ticks=[.001, .01, .1, 1, 10, 100],
dimensions = (600, 550),
dimensions = (600, 400),
legend_pos = :topleft,
x_label="Maximum Inference Paths",
y_label="Runtime 10^ (s)",
grouping=inference_paths,
grouping=description,
filename="inference-paths-runtime")
2 changes: 1 addition & 1 deletion Experiments/graph_results.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
[log10(y) for y in y_values],
group = groups,
x_ticks = x_ticks,
xlims = [0, length(x_order)],
xlims = [0, length(x_order) + .5],
ylims = (log10(ylims[1]),log10(ylims[2])),
y_ticks = [log10(y) for y in y_ticks],
legend = legend_pos,
Expand Down
3 changes: 3 additions & 0 deletions Experiments/run_estimators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ function run_estimation_experiments(experiment_params_list::Vector{ExperimentPar
only_shortest_path_cycle= experiment_params.only_shortest_path_cycle)) for _ in 1:3]
estimate_time = median([x.time for x in estimate_results]) # Convert back to seconds from nano seconds
estimate = max(1, estimate_results[1].value)
if isinf(estimate) || isnan(estimate)
estimate = 1.0
end
query_type = all_queries[dataset][i].query_type
experiment_results[i] = (estimate, exact_size, estimate_time, query_type, query_path, nv(query.graph))
end
Expand Down
4 changes: 2 additions & 2 deletions Source/QuasiStableCardinalityEstimator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ function sample_paths(partial_paths::Matrix{Color}, partial_weights::Vector{W},
sample_weights = [get_count(w) for w in new_partial_weights]
sample_weights = AnalyticWeights(sample_weights ./ overall_bounds_sum)
if sampling_strategy == uniform
sample_weights = AnalyticWeights([1.0 for i in eachindex(new_partial_paths)] ./ num_nonzero_entries)
sample_weights = AnalyticWeights([1.0 for i in eachindex(new_partial_weights)] ./ length(new_partial_weights))
end
sample_indices::Vector{Int} = sample(1:length(new_partial_weights), sample_weights, num_samples; replace=false)

Expand All @@ -89,7 +89,7 @@ function sample_paths(partial_paths::Matrix{Color}, partial_weights::Vector{W},
# scale the weights so that their sum equals the input weight's sum
overall_bounds_sum / sampled_bounds_sum
else
1.0 / sample_weights[i]
1.0 / (sample_weights[i] * num_samples)
end
sampled_partial_weights[i] = scale_coloring(new_partial_weights[idx], inverse_sampling_probability)
end
Expand Down

0 comments on commit 999c880

Please sign in to comment.