Skip to content

Commit

Permalink
Merge pull request #44 from mkyl/experiment-cleanup
Browse files Browse the repository at this point in the history
Experiment cleanup
  • Loading branch information
diandremiguels authored Nov 13, 2023
2 parents da93ee7 + 4f70262 commit e3d03d5
Show file tree
Hide file tree
Showing 16 changed files with 103 additions and 310 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
using Plots.PlotMeasures
include("Experiments/Experiments.jl")
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6

experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, max_cycle_size=current_size) for current_dataset in datasets for current_size in 2:max_cycles]
Expand All @@ -16,10 +14,7 @@ for experiment_params in experiment_params_list
summary_name = params_to_summary_filename(experiment_params)
summary_file_location = "Experiments/SerializedSummaries/" * summary_name
println("Building Color Summary: ", summary_name)
# normal_results = @timed generate_color_summary(data, summary_params; verbose=1)
# normal_results = @timed generate_color_summary(data, summary_params; verbose=0, detailed_cycles=false)
results= @timed generate_color_summary(data, summary_params; verbose=1, detailed_cycles=false)
# println("normal time: ", normal_results.time)
results= @timed generate_color_summary(data, summary_params; verbose=1, use_cycle_join_table=false)
println("detailed time: ", results.time)
summary_size = Base.summarysize(results.value)
serialize(summary_file_location, results.value)
Expand All @@ -35,4 +30,4 @@ end
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=cycle_size, filename="detailed-sample-experiment")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=cycle_size, filename="cycles-without-join-table-cycle-stats-experiment")
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
using Plots.PlotMeasures
include("Experiments/Experiments.jl")
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6

experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, max_cycle_size=current_size) for current_dataset in datasets for current_size in 2:max_cycles]

println("started building")
build_experiments(experiment_params_list)
println("started estimating")
Expand Down
13 changes: 13 additions & 0 deletions Experiments/Scripts/run-inference-sampling-experiments.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Plots.PlotMeasures
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
max_paths = 60
experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, inference_max_paths=current_paths) for current_dataset in datasets for current_paths in 2:10:max_paths]

println("started building")
build_experiments(experiment_params_list)
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=inference_paths, filename="inferencesampling")
14 changes: 14 additions & 0 deletions Experiments/Scripts/run-summary-sampling-experiments.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Plots.PlotMeasures
include("../Experiments.jl")


datasets::Vector{DATASET} = [aids, wordnet, lubm80, human]
max_paths = 1000
experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, summary_max_paths=current_paths) for current_dataset in datasets for current_paths in 0:200:max_paths]

println("started building")
build_experiments(experiment_params_list)
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=error, grouping=summary_paths, filename="summarysamples")
13 changes: 13 additions & 0 deletions Experiments/Scripts/run-summary-sampling-with-query-type.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Plots.PlotMeasures
include("../Experiments.jl")

current_dataset = yeast
max_paths = 300

experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, summary_max_paths=current_paths) for current_paths in 10:30:max_paths]
println("started building")
build_experiments(experiment_params_list)
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
graph_grouped_box_plot(experiment_params_list, x_type=query_type, y_type=estimate_error, grouping=summary_paths, filename="summarysamplesquerytypesyeast")
21 changes: 21 additions & 0 deletions Experiments/Scripts/update_experiments.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using Plots.PlotMeasures
using Graphs
include("../Experiments.jl")

datasets::Vector{DATASET} = [wordnet]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6
proportions_not_updated = [0, 0.2, 0.4, 0.6, 0.8, 1]

experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, partitioner=QuasiStable, max_cycle_size=current_cycle, proportion_not_updated=current_proportion)
for current_dataset in datasets for current_cycle in 2:max_cycles for current_proportion in proportions_not_updated]
println("started building")
build_experiments(experiment_params_list)
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
# compare how overall accuracy is affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=proportion_not_updated, filename="overall-accuracy-and-updates")
# compare how cycle stat accuracies are affected by summary updates
graph_grouped_box_plot(experiment_params_list, x_type=proportion_not_updated, y_type=estimate_error, grouping=cycle_size, filename="cycle-stats-and-updates")
27 changes: 24 additions & 3 deletions Experiments/build_color_summaries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,34 @@ function build_experiments(experiment_params_list::Vector{ExperimentParams})
dataset = experiment_params.dataset
summary_params = experiment_params.summary_params
data = load_dataset(dataset)
cloned_data = DataGraph(nv(data.graph))
remaining_edges = []
if (experiment_params.summary_params.proportion_not_updated < 1.0)
cloned_data.vertex_labels = data.vertex_labels
graph_edges = collect(edges(data.graph))
# edges_to_add = (length(graph_edges) * experiment_params.summary_params.proportion_not_updated)
for edge in graph_edges
if (rand() < experiment_params.summary_params.proportion_not_updated)
add_labeled_edge!(cloned_data, (src(edge), dst(edge)), only(data.edge_labels[(src(edge), dst(edge))]))
# edges_to_add -= 1
else
push!(remaining_edges, edge)
end
end
end
summary_name = params_to_summary_filename(experiment_params)
summary_file_location = "Experiments/SerializedSummaries/" * summary_name
println("Building Color Summary: ", summary_name)
timing_vec = Float64[]
results = @timed generate_color_summary(data, summary_params; verbose=1, timing_vec=timing_vec)
summary_size = Base.summarysize(results.value)
serialize(summary_file_location, results.value)
results = @timed generate_color_summary((experiment_params.summary_params.proportion_not_updated < 1.0) ? cloned_data : data, summary_params; verbose=1, timing_vec=timing_vec)
current_summary = results.value
if (experiment_params.summary_params.proportion_not_updated < 1.0)
for edge in remaining_edges
add_summary_edge!(current_summary, src(edge), dst(edge), get(data.edge_labels, (src(edge), dst(edge)), []))
end
end
summary_size = Base.summarysize(current_summary)
serialize(summary_file_location, current_summary)
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.num_colors),
Expand Down
5 changes: 3 additions & 2 deletions Experiments/graph_results.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats number_of_colors build_phase
#todo: query type
@enum GROUP dataset technique cycle_size summary_paths inference_paths query_type sampling_type cycle_stats number_of_colors build_phase proportion_not_updated

@enum VALUE estimate_error runtime build_time memory_footprint

Expand Down Expand Up @@ -244,6 +243,8 @@ function get_value_from_param(experiment_param::ExperimentParams, value_type::GR
return experiment_param.only_shortest_path_cycle
elseif value_type == number_of_colors
return experiment_param.summary_params.num_colors
elseif value_type == proportion_not_updated
return experiment_param.summary_params.proportion_not_updated
else
# default to grouping by technique
return (experiment_param.summary_params.partitioner, experiment_param.summary_params.label_refining_rounds)
Expand Down
5 changes: 3 additions & 2 deletions Experiments/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ struct ExperimentParams
function ExperimentParams(;dataset::DATASET, num_colors::Int=64, max_cycle_size=6,
only_shortest_path_cycle=false, summary_max_paths=1000,
partitioner::PARTITIONER = QuasiStable, weighting=true, inference_max_paths=500, use_partial_sums=true,
sampling_strategy=redistributive, label_refining_rounds = 0)
sampling_strategy=redistributive, label_refining_rounds = 0, proportion_not_updated=1.0)
return new(dataset, ColorSummaryParams(num_colors=num_colors,
max_cycle_size=max_cycle_size,
max_partial_paths=summary_max_paths,
partitioner=partitioner,
weighting=weighting,
label_refining_rounds=label_refining_rounds),
label_refining_rounds=label_refining_rounds,
proportion_not_updated=proportion_not_updated),
inference_max_paths,
only_shortest_path_cycle,
use_partial_sums,
Expand Down
8 changes: 5 additions & 3 deletions Source/CardinalityWithColors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ struct ColorSummaryParams
partitioner::PARTITIONER
weighting::Bool
label_refining_rounds::Int
proportion_not_updated::Float16

function ColorSummaryParams(;num_colors::Int=64, max_cycle_size=4, max_partial_paths=1000,
partitioner::PARTITIONER = QuasiStable, weighting=true, label_refining_rounds = 0)
return new(num_colors, max_cycle_size, max_partial_paths, partitioner, weighting, label_refining_rounds)
partitioner::PARTITIONER = QuasiStable, weighting=true, label_refining_rounds = 0, proportion_not_updated = 1.0)
return new(num_colors, max_cycle_size, max_partial_paths, partitioner, weighting, label_refining_rounds, proportion_not_updated)
end
end

Expand All @@ -45,7 +46,8 @@ function params_to_string(params::ColorSummaryParams)
summary_name *= string(params.num_colors) * "_"
summary_name *= string(params.max_cycle_size) * "_"
summary_name *= string(params.max_partial_paths)* "_"
summary_name *= string(params.label_refining_rounds)
summary_name *= string(params.label_refining_rounds)* "_"
summary_name *= string(params.proportion_not_updated)
return summary_name
end

Expand Down
37 changes: 0 additions & 37 deletions run-cycle-experiments-timed.jl

This file was deleted.

33 changes: 0 additions & 33 deletions run-inference-sampling-experiments.jl

This file was deleted.

24 changes: 0 additions & 24 deletions run-summary-sampling-experiments-full-range.jl

This file was deleted.

30 changes: 0 additions & 30 deletions run-summary-sampling-experiments.jl

This file was deleted.

Loading

0 comments on commit e3d03d5

Please sign in to comment.