Skip to content

Commit

Permalink
Merge pull request #64 from mkyl/alley-and-cleanup
Browse files Browse the repository at this point in the history
LSS and Cleanup
  • Loading branch information
diandremiguels authored Jul 31, 2024
2 parents 684fedb + 74df465 commit 8e69f1a
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 21 deletions.
38 changes: 38 additions & 0 deletions Experiments/Scripts/append_lss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# The LSS results were established using a different code base. This file takes the resulting csv files and appends the information
# to the end of an overall comparison results file to be used for plotting data in figures used in the paper.
import csv
import pandas as pd

lss_datasets = ['aids', 'dblp', 'eu2005', 'human', 'lubm80', 'yeast', 'youtube']

build_inference_filename = 'Experiments/Results/LSS/result/build_and_inference_ST.csv'
comparison_filename = 'Experiments/comparison_results.csv'

estimator = 'lss'
with open(comparison_filename, 'a') as comparison_file_obj:
writer_obj = csv.writer(comparison_file_obj)
for dataset in lss_datasets:
runtime = 0
results_filename = 'Experiments/Results/LSS/result/' + dataset + '/' + dataset + '_NNGINConcat_freq_80_cv.csv'
with open(build_inference_filename) as build_file_obj:
build_reader_obj = csv.reader(build_file_obj)
for row in build_reader_obj:
if row[0] == dataset:
runtime = row[3]
build_file_obj.close()

with open(results_filename) as results_file_obj:
reader_obj = csv.reader(results_file_obj)
for row in reader_obj:
if row[2] == 'error':
continue
query = 'query' + row[1]
value = row[2]
# now write estimator,dataset,query,value,runtime to a new row in the comparison_results.csv
new_row = [estimator, dataset, query, value, runtime]
writer_obj.writerow(new_row)
results_file_obj.close()
comparison_file_obj.close()

df = pd.read_csv('Experiments/comparison_results.csv')
df.to_parquet('Experiments/comparison_results.parquet')
18 changes: 9 additions & 9 deletions Experiments/Scripts/comparison_exps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ println("Estimating...")
run_estimation_experiments(experiment_params; timeout=TIMEOUT_SEC)
run_estimation_experiments(max_bounds_experiment_params; timeout=TIMEOUT_SEC)

comparison_methods = ["alley", "alleyTPI", "wj", "impr", "jsub", "cs", "cset", "sumrdf"]
comparison_methods = ["alley", "alleyTPI", "wj", "impr", "jsub", "cs", "cset", "sumrdf", "lss"]
x_order = [string(data) for data in datasets]
bounds_x_order = [string(data) for data in bounds_datasets]
legend_order = [params.description for params in experiment_params][1:Int(length(experiment_params)/ length(datasets))]
max_bounds_legend_order = [params.description for params in max_bounds_experiment_params][1:Int(length(max_bounds_experiment_params)/ length(bounds_datasets))]
legend_order = vcat(legend_order, comparison_methods)

colors = [:red :yellow :maroon3 :palevioletred1 :dodgerblue :coral :palegreen :mediumpurple2 :darkgreen :cadetblue1]
colors = [:red :yellow :maroon3 :palevioletred1 :dodgerblue :coral :palegreen :mediumpurple2 :darkgreen :cadetblue1 :goldenrod]

println("Graphing figures 3 and 4...")

Expand Down Expand Up @@ -131,12 +131,12 @@ graph_grouped_box_plot(max_bounds_experiment_params;
y_label="Inference Latency log\$_{10}\$ (s)",
filename="fig_6") # bounds runtime

comparison_methods = ["alleyTPI", "sumrdf"]
comparison_methods = ["alleyTPI", "sumrdf", "lss"]
x_order = [string(data) for data in datasets]
bar_legend_order = [params.description for params in smaller_experiment_params][1:Int(length(smaller_experiment_params)/ length(datasets))]
bar_legend_order = vcat(bar_legend_order, comparison_methods)
println("bar legend order: ", bar_legend_order)
bar_plot_colors = [:red :palevioletred1 :cadetblue1]
bar_plot_colors = [:red :palevioletred1 :cadetblue1 :goldenrod]

println("Graphing figures 7 and 8")

Expand All @@ -146,9 +146,9 @@ graph_grouped_bar_plot(smaller_experiment_params;
x_order = x_order,
legend_order = bar_legend_order,
ylims=[0, 10],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9],#[20, 40, 60, 80, 100],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8],
legend_pos=:topleft,
dimensions = (850, 400),
dimensions = (900, 400),
scale_factor = 1000,
log_scale = true,
group_colors = bar_plot_colors,
Expand All @@ -161,9 +161,9 @@ graph_grouped_bar_plot(smaller_experiment_params;
x_order = x_order,
legend_order = bar_legend_order,
legend_pos=:topleft,
ylims=[0, 10],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9], #[100, 200, 300, 400, 500, 600, 700, 800],
dimensions = (850, 400),
ylims=[0, 11],
y_ticks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
dimensions = (900, 400),
scale_factor = 1000,
log_scale = true,
group_colors = bar_plot_colors,
Expand Down
67 changes: 56 additions & 11 deletions Experiments/graph_results.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ function graph_box_plot(experiment_params_list::Vector{ExperimentParams};
# load the results
results_filename = params_to_results_filename(experiment_params)
results_path = "Experiments/Results/Estimation_" * results_filename
# println("results path: ", results_path)
results_df = CSV.read(results_path, DataFrame; normalizenames=true)

# keep track of the data points
Expand Down Expand Up @@ -105,7 +104,6 @@ function graph_grouped_box_plot(experiment_params_list::Vector{ExperimentParams}
# load the results
results_filename = params_to_results_filename(experiment_params)
results_path = "Experiments/Results/Estimation_" * results_filename
# println("results path: ", results_path)
results_df = CSV.read(results_path, DataFrame; normalizenames=true)

# get the x_value and grouping (same for all results in this experiment param)
Expand Down Expand Up @@ -186,24 +184,36 @@ function comparison_dataset()
dataset = comparison_results[i, :Dataset]
query_path = comparison_results[i, :Query]
if dataset == "lubm80"
comparison_results[i, :QueryType] = match(r".*/lubm80_(.*).txt", query_path).captures[1]
if !isnothing(match(r".*/lubm80_(.*).txt", query_path))
comparison_results[i, :QueryType] = match(r".*/lubm80_(.*).txt", query_path).captures[1]
else
comparison_results[i, :QueryType] = "n/a"
end
elseif dataset in ["aids", "human", "yago"]
comparison_results[i, :QueryType] = match(r"(.*)_.*/.*", query_path).captures[1]
if !isnothing(match(r"(.*)_.*/.*", query_path))
comparison_results[i, :QueryType] = match(r"(.*)_.*/.*", query_path).captures[1]
else
comparison_results[i, :QueryType] = "n/a"
end
else
comparison_results[i, :QueryType] = match(r".*/query_(.*)_.*", query_path).captures[1]
if !isnothing(match(r".*/query_(.*)_.*", query_path))
comparison_results[i, :QueryType] = match(r".*/query_(.*)_.*", query_path).captures[1]
else
comparison_results[i, :QueryType] = "n/a"
end
end
end
results_dict = Dict()
for i in 1:nrow(comparison_results)
dataset = comparison_results[i, :Dataset]
estimator = comparison_results[i, :Estimator]
query_path = comparison_results[i, :Query]
query_path = (estimator == "lss") ? "query" * string(i) : comparison_results[i, :Query]
results_dict[(dataset, estimator, query_path)] = (Estimate=comparison_results[i, :Value],
Runtime=comparison_results[i, :Runtime],
QueryType=comparison_results[i,:QueryType])
end
estimators = unique(comparison_results[:, :Estimator])
println(estimators)
println("Estimators: ", estimators)
return estimators, results_dict
end

Expand Down Expand Up @@ -265,12 +275,12 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
current_y = if y_type == estimate_error
min(10^30, max(1, results_df[i, :Estimate])) / results_df[i, :TrueCard]
else # y_type == runtime
results_df[i, :EstimationTime]
typeof(results_df[i, :EstimationTime]) == String ? parse(Float64, results_df[i, :EstimationTime]) : results_df[i, :EstimationTime]
end
true_card[(data, get_query_id(string(experiment_params.dataset), results_df[i, :QueryPath]))] = (results_df[i, :TrueCard], current_x)
# push the errors and their groupings into the correct vector
push!(x_values, string(current_x))
push!(y_values, current_y)
push!(y_values, typeof(current_y) == String ? parse(Float64, current_y) : current_y)
push!(estimators, current_group)
end
end
Expand All @@ -283,6 +293,9 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
card = query_card_and_size[1]
size = query_card_and_size[2]
for estimator in estimator_types
if (estimator == "lss")
continue
end
comp_key = (data, estimator, query_path)
(estimate, runtime) = 1, 60 # TODO: We shouldn't use an arbitrary number for runtime here
if haskey(comparison_results, comp_key)
Expand All @@ -302,15 +315,37 @@ function graph_grouped_boxplot_with_comparison_methods(experiment_params_list::V
current_y = if y_type == estimate_error
min(10^30, max(1, estimate)) / card
else # y_type == runtime
runtime / 1000.0
typeof(runtime) == String ? parse(Float64, runtime) / 1000 : runtime / 1000.0
end

# push the errors and their groupings into the correct vector
push!(x_values, string(current_x))
push!(y_values, current_y)
push!(y_values, typeof(current_y) == String ? parse(Float64, current_y) : current_y)
push!(estimators, estimator)
end
end

# now handle leftover lss data
if ("lss" in estimator_types)
for results_key in keys(comparison_results)
# results_dict[(dataset, estimator, query_path)] = (Estimate=comparison_results[i, :Value], Runtime=comparison_results[i, :Runtime], QueryType=comparison_results[i,:QueryType])
# look for all the rows where the estimator is lss, then push the appropriate x and y values.
if (results_key[2] == "lss")
current_results = comparison_results[results_key]
current_x = results_key[1]
current_y = if y_type == estimate_error
current_results[1]
else
current_results[2]
end
estimator = "lss"
push!(x_values, string(current_x))
push!(y_values, typeof(current_y) == String ? parse(Float64, current_y) : current_y)
push!(estimators, estimator)
end
end
end

if isnothing(x_order)
x_order = sort(unique(x_values))
end
Expand Down Expand Up @@ -409,13 +444,23 @@ function graph_grouped_bar_plot(experiment_params_list::Vector{ExperimentParams}
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "patents", "yeast", "youtube"])
append!(y_values, [88, 648, 569, 800, 6600, 6900, 6300, 3200])
append!(groups, ["alleyTPI" for _ in 1:8])
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "yeast", "youtube"])
append!(y_values, [9.023910, 9.067842, 9.018477, 8.981142, 9.010042, 9.045878, 8.992702]) # units of MB
append!(groups, ["lss" for _ in 1:7])

elseif y_type == build_time
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "patents", "yeast", "youtube"])
append!(y_values, [.3, 4.5, 9.9, .5, 4.2, 8.5, .1, 2.1])
append!(groups, ["sumrdf" for _ in eachindex(y_values)])
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "patents", "yeast", "youtube"])
append!(y_values, [221, 2518, 17452, 1061, 14233, 11738, 35585, 11044])
append!(groups, ["alleyTPI" for _ in 1:8])
append!(x_values, ["aids", "human", "lubm80", "dblp", "eu2005", "yeast", "youtube"])
# append!(y_values, [1022.6, 29.5023, 3.6737, 3355.36, 492.89, 7047.44, 3130.0165]) # multithreaded results
append!(y_values, [2207.7717, 50.2491, 5.9976, 8105.503, 328.89, 19839.2887, 2309.733]) # single-threaded results
append!(groups, ["lss" for _ in 1:7])


end
for experiment_params in experiment_params_list
# load the results
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ julia> using Pkg;
julia> Pkg.instantiate();
```

3. Download query graphs, data graphs, and true cardinalities from [G-Care](https://github.com/yspark-dblab/gcare) and [In-Memory Subgraph Matching](https://github.com/RapidsAtHKUST/SubgraphMatching)
3. Download query graphs, data graphs, and true cardinalities from [G-Care](https://github.com/yspark-dblab/gcare) and [In-Memory Subgraph Matching](https://github.com/RapidsAtHKUST/SubgraphMatching), also available as [zipped files](https://drive.google.com/drive/folders/1pjJz9ahXFEd3Nd1OxqLA2YNnXGuCVpEp?usp=sharing).

## API

Expand Down

0 comments on commit 8e69f1a

Please sign in to comment.