Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Layered Partitioning Schemes #46

Merged
merged 3 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions Experiments/Scripts/coloring_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
using Profile
include("../Experiments.jl")

datasets = [human, aids, yeast, hprd, dblp]
partitioners = [QuasiStable, Degree, Hash]
label_refining_rounds = [0, 1, 2, 3, 4]
datasets = [hprd]
partitioning_schemes = [
[(Degree, 64)],
[(NeighborNodeLabels, 64)],
[(QuasiStable, 64)],
[(QuasiStable, 32), (NeighborNodeLabels, 32)],
[(Hash, 64)],
[(Degree, 8), (QuasiStable, 32), (NeighborNodeLabels, 24)],
[(Degree, 8), (NeighborNodeLabels, 24), (QuasiStable, 32)]]

experiment_params = Vector{ExperimentParams}()
for dataset in datasets
for partitioner in partitioners
for refining_round in label_refining_rounds
num_initial_partitions = Int(128/(2 ^ refining_round))
push!(experiment_params, ExperimentParams(dataset=dataset, partitioner=partitioner,
num_colors = num_initial_partitions,
label_refining_rounds=refining_round, sampling_strategy=redistributive))
println(num_initial_partitions)
end
for scheme in partitioning_schemes
push!(experiment_params, ExperimentParams(dataset=dataset, partitioning_scheme=scheme))
end
end

Expand Down
12 changes: 6 additions & 6 deletions Experiments/build_color_summaries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,37 +52,37 @@ function build_experiments(experiment_params_list::Vector{ExperimentParams})
summary_size = Base.summarysize(current_summary)
serialize(summary_file_location, current_summary)
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"FullTime",
string(results.time),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"Coloring",
string(timing_vec[1]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"CycleCounting",
string(timing_vec[2]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"BloomFilter",
string(timing_vec[3]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"CardinalityCounting",
string(timing_vec[4]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioner),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"EdgeStats",
string(timing_vec[5]),
Expand Down
2 changes: 1 addition & 1 deletion Experiments/graph_results.jl
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,6 @@ function get_value_from_param(experiment_param::ExperimentParams, value_type::GR
return experiment_param.summary_params.proportion_deleted
else
# default to grouping by technique
return (experiment_param.summary_params.partitioner, experiment_param.summary_params.label_refining_rounds)
return experiment_param.summary_params.partitioning_scheme
end
end
20 changes: 10 additions & 10 deletions Experiments/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ struct ExperimentParams
use_partial_sums::Bool
sampling_strategy::SAMPLING_STRATEGY

function ExperimentParams(;dataset::DATASET, num_colors::Int=64, max_cycle_size=6,
function ExperimentParams(;dataset::DATASET, max_cycle_size=6,
only_shortest_path_cycle=false, summary_max_paths=1000,
partitioner::PARTITIONER = QuasiStable, weighting=true, inference_max_paths=500, use_partial_sums=true,
sampling_strategy=redistributive, label_refining_rounds = 0, proportion_not_updated=1.0, proportion_deleted=0.0)
return new(dataset, ColorSummaryParams(num_colors=num_colors,
max_cycle_size=max_cycle_size,
max_partial_paths=summary_max_paths,
partitioner=partitioner,
weighting=weighting,
label_refining_rounds=label_refining_rounds,
proportion_not_updated=proportion_not_updated,
partitioning_scheme::Vector{Tuple{PARTITIONER, Int}} = [(QuasiStable, 64)], weighting=true,
inference_max_paths=500, use_partial_sums=true,
sampling_strategy=redistributive, proportion_not_updated=1.0, proportion_deleted=0.0)
return new(dataset,
ColorSummaryParams(max_cycle_size=max_cycle_size,
max_partial_paths=summary_max_paths,
partitioning_scheme=partitioning_scheme,
weighting=weighting,
proportion_not_updated=proportion_not_updated,
proportion_deleted=proportion_deleted),
inference_max_paths,
only_shortest_path_cycle,
Expand Down
16 changes: 7 additions & 9 deletions Source/CardinalityWithColors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,29 @@ end
colors::StartEndColorPair
end

@enum PARTITIONER QuasiStable Hash Degree DirectedDegree SimpleLabel InOut LabelInOut NeighborEdges MostNeighbors
@enum PARTITIONER QuasiStable Hash Degree DirectedDegree SimpleLabel InOut LabelInOut NeighborEdges MostNeighbors NeighborNodeLabels


struct ColorSummaryParams
num_colors::Int
max_cycle_size::Int
max_partial_paths::Int
partitioner::PARTITIONER
partitioning_scheme::Vector{Tuple{PARTITIONER, Int}}
weighting::Bool
label_refining_rounds::Int
proportion_not_updated::Float16
proportion_deleted::Float16

function ColorSummaryParams(;num_colors::Int=64, max_cycle_size=4, max_partial_paths=1000,
partitioner::PARTITIONER = QuasiStable, weighting=true, label_refining_rounds=0, proportion_not_updated=1.0, proportion_deleted=0.0)
return new(num_colors, max_cycle_size, max_partial_paths, partitioner, weighting, label_refining_rounds, proportion_not_updated, proportion_deleted)
function ColorSummaryParams(;max_cycle_size=4, max_partial_paths=1000,
partitioning_scheme::Vector{Tuple{PARTITIONER, Int}} = [(QuasiStable, 64)], weighting=true, proportion_not_updated = 1.0, proportion_deleted=0.0)
num_colors = sum([x[2] for x in partitioning_scheme])
return new(num_colors, max_cycle_size, max_partial_paths, partitioning_scheme, weighting, proportion_not_updated, proportion_deleted)
end
end

function params_to_string(params::ColorSummaryParams)
summary_name = "ColorSummary_" * string(params.partitioner) * "_"
summary_name *= string(params.num_colors) * "_"
summary_name = "ColorSummary_" * string(params.partitioning_scheme) * "_"
summary_name *= string(params.max_cycle_size) * "_"
summary_name *= string(params.max_partial_paths)* "_"
summary_name *= string(params.label_refining_rounds)* "_"
summary_name *= string(params.proportion_not_updated) * "_"
summary_name *= string(params.proportion_deleted)
return summary_name
Expand Down
26 changes: 14 additions & 12 deletions Source/ColorSummary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ mutable struct ColorSummary
max_cycle_size::Int
total_edges::Int
total_nodes::Int
num_colors::Int
total_added_edges::Int
# for outdegrees, c2 is the color of the outneighbor
# for indegrees, c2 is the color of the inneighbor
Expand All @@ -40,7 +41,7 @@ end
# chooses a color for a new node to be added to
function choose_color(summary)
# current implementation: find the biggest color
# other future options:
# other future options:
# - make a brand new color just for added nodes?
# - find the color with the smallest in/out degree? Doesn't work because with updates the color will be extremely messed up
return get_largest_color(summary)
Expand Down Expand Up @@ -86,13 +87,13 @@ function add_summary_node!(summary, node_labels, node)
end
end
end

# add to the cardinality counts
for node_label in node_labels
summary.color_label_cardinality[color][node_label] = get(summary.color_label_cardinality[color], node_label, 0) + 1
end

# for cycle stats, since the number of edges/cycles are the same,
# for cycle stats, since the number of edges/cycles are the same,
# cycle likelihood for an arbitrary edge doesn't change
end

Expand Down Expand Up @@ -121,7 +122,7 @@ function delete_summary_node!(summary, node_labels, node)
end
end
end

# subtract from the cardinality counts
for node_label in node_labels
summary.color_label_cardinality[color][node_label] -= 1
Expand Down Expand Up @@ -184,7 +185,7 @@ function update_edge_degrees!(summary, start_node, end_node, edge_labels::Vector
if !haskey(summary.edge_deg[edge_label][vertex_label][start_color], end_color)
summary.edge_deg[edge_label][vertex_label][start_color][end_color] = DegreeStats(0, 0, 0)
end
summary.edge_deg[edge_label][vertex_label][start_color][end_color].avg_out = c1_count == 0 ? 0 :
summary.edge_deg[edge_label][vertex_label][start_color][end_color].avg_out = c1_count == 0 ? 0 :
min(((original_avg_out * c1_count) + probability_end_vertex_label), c1_count * summary.color_label_cardinality[end_color][vertex_label]) / c1_count
# note we don't have to update the color_label_cardinality since no new nodes were added...
end
Expand Down Expand Up @@ -225,8 +226,9 @@ function generate_color_summary(g::DataGraph, params::ColorSummaryParams=ColorSu
coloring_time = time()
color_filters::Dict{Color, SmallCuckoo} = Dict()
color_label_cardinality::Dict{Color, Any} = Dict()
color_hash::Dict{NodeId, Color} = color_graph(g, params, params.num_colors)
color_sizes = [0 for _ in 1:maximum(values(color_hash))]
color_hash::Dict{NodeId, Color} = color_graph(g, params)
num_colors = maximum(values(color_hash))
color_sizes = [0 for _ in 1:num_colors]
for c in values(color_hash)
color_sizes[c] += 1
end
Expand Down Expand Up @@ -443,7 +445,7 @@ function generate_color_summary(g::DataGraph, params::ColorSummaryParams=ColorSu

return ColorSummary(color_label_cardinality, edge_deg, color_filters,
cycle_probabilities, cycle_length_probabilities, params.max_cycle_size,
ne(g.graph), nv(g.graph), 0)
ne(g.graph), nv(g.graph), num_colors, 0)
end

function color_hash_to_groups(color_hash, num_colors)
Expand Down Expand Up @@ -582,24 +584,24 @@ function join_table_cycle_likelihoods(g::DataGraph, color_hash, cycle_size::Int,
push!(detailed_edges[detailed_edge[1]], detailed_edge)
push!(detailed_edges[detailed_reverse_edge[1]], detailed_reverse_edge)
end

# create tables for each size of cycle/path
stored_cycles::Dict{CyclePathAndColors, Float32} = Dict() # this stores summary info representing the path lengths we want to close
stored_paths::Dict{CyclePathAndColors, Float32} = Dict() # this stores summary info representing the path lengths that actually closed
# summary info = [c1, c2, [d]]

# initialize with size two data
# start up the "current joins" vector
updated_paths::Dict{Tuple{Int, Int, Int, Int, Vector{Bool}}, Float64} = Dict() # stores our progress as we repeatedly join
updated_paths::Dict{Tuple{Int, Int, Int, Int, Vector{Bool}}, Float64} = Dict() # stores our progress as we repeatedly join
for edge_set in values(detailed_edges)
for edge in edge_set
summary_info = CyclePathAndColors(edge[5], (edge[3], edge[4]))
updated_paths[edge] = 1.0
stored_paths[summary_info] = 1.0
stored_cycles[summary_info] = (edge[1], edge[2], color_hash[edge[1]], color_hash[edge[2]], [false]) in detailed_edges[edge[1]] ?
stored_cycles[summary_info] = (edge[1], edge[2], color_hash[edge[1]], color_hash[edge[2]], [false]) in detailed_edges[edge[1]] ?
1.0 : 0.0
end
end
end

# for each cycle size...
for current_cycle_size in 3: cycle_size
Expand Down
Loading