From 7b5785fb9a231034d16e6f9485eb941146b3c468 Mon Sep 17 00:00:00 2001 From: kdeweese Date: Wed, 14 May 2025 16:38:26 -0700 Subject: [PATCH 1/2] add afforest to CC --- .../graph/algorithm/connected_components.hpp | 179 ++++++++++++++++++ tests/cc_tests.cpp | 69 ++++++- 2 files changed, 246 insertions(+), 2 deletions(-) diff --git a/include/graph/algorithm/connected_components.hpp b/include/graph/algorithm/connected_components.hpp index a972cb0..677e4e8 100644 --- a/include/graph/algorithm/connected_components.hpp +++ b/include/graph/algorithm/connected_components.hpp @@ -20,6 +20,8 @@ #include "graph/views/depth_first_search.hpp" #include "graph/views/breadth_first_search.hpp" #include +#include +#include #ifndef GRAPH_CC_HPP # define GRAPH_CC_HPP @@ -117,6 +119,183 @@ size_t connected_components(G&& g, // graph return cid; } +template +static void link( vertex_id_t u, vertex_id_t v, Component& component) +{ + vertex_id_t p1 = component[u]; + vertex_id_t p2 = component[v]; + + while( p1 != p2 ) { + vertex_id_t high = std::max(p1, p2); + vertex_id_t low = p1 + (p2 - high); + vertex_id_t p_high = component[high]; + if (p_high == low) + break; + if (p_high == high) { + if (component[high] == high) { + component[high] = low; + break; + } + else { + high = low; + } + } + p1 = component[p_high]; + p2 = component[low]; + } +} + +template +static void compress( Component& component +) { + for (size_t i = 0; i < component.size(); ++i) { + if (component[i] != component[component[i]]) { + component[i] = component[component[i]]; + } + } +} + +template< typename vertex_id_t, random_access_range Component> +static vertex_id_t sample_frequent_element( Component& component, + size_t num_samples = 1024 +) { + std::unordered_map counts(32); + std::mt19937 gen; + std::uniform_int_distribution distribution(0, component.size() - 1); + + for (size_t i = 0; i < num_samples; ++i) { + vertex_id_t sample = distribution(gen); + counts[component[sample]]++; + } + + auto&& [num, count] = *std::max_element(counts.begin(), counts.end(), [](auto&& a, auto&& b) { return std::get<1>(a) < std::get<1>(b); }); + return num; +} + +template +requires random_access_range> && integral> && +std::convertible_to,vertex_id_t> && +std::convertible_to,range_value_t> +size_t afforest(G&& g, // graph + Component& component, // out: connected component assignment + const size_t neighbor_rounds = 2 +) { + size_t N(size(vertices(g))); + std::iota(component.begin(), component.end(), 0); + + for (size_t r = 0; r < neighbor_rounds; ++r) { + for (auto&& [uid, u] : views::vertexlist(g)) { + if (r < size(edges(g, u))) { + auto it = edges(g, u).begin(); + std::advance(it, r); + link(uid, target_id(g, *it), component); + } + } + compress(component); + } + + vertex_id_t c = sample_frequent_element>(component); + + for (auto&& [uid, u] : views::vertexlist(g)) { + if ( component[uid] == c ) { + continue; + } + if (neighbor_rounds < edges(g, uid).size()) { + auto it = edges(g, u).begin(); + std::advance(it, neighbor_rounds); + for ( ; it != edges(g, u).end(); ++it) { + link(uid, target_id(g, *it), component); + } + } + } + + compress(component); + vertex_id_t target_id = 0; + std::map,vertex_id_t> reindex; + for (vertex_id_t vtx = 0; vtx < N; ++vtx) { + if(!reindex.empty()) { + auto it = reindex.find(component[vtx]); + if (it != reindex.end()) { + component[vtx] = (*it).second; + } + } else if (component[vtx] == target_id) { + ++target_id; + } else if(component[vtx] > target_id) { + reindex.insert(pair(component[vtx], target_id)); + component[vtx] = target_id; + ++target_id; + } + } + + return target_id; +} + +template +requires random_access_range> && integral> && +std::convertible_to,vertex_id_t> && +std::convertible_to,range_value_t> +size_t afforest(G&& g, // graph + GT&& g_t, // graph transpose + Component& component, // out: connected component assignment + const size_t neighbor_rounds = 2 +) { + size_t N(size(vertices(g))); + std::iota(component.begin(), component.end(), 0); + + for (size_t r = 0; r < neighbor_rounds; ++r) { + for (auto&& [uid, u] : views::vertexlist(g)) { + if (r < size(edges(g, u))) { + auto it = edges(g, u).begin(); + std::advance(it, r); + link(uid, target_id(g, *it), component); + } + } + compress(component); + } + + vertex_id_t c = sample_frequent_element>(component); + + for (auto&& [uid, u] : views::vertexlist(g)) { + if ( component[uid] == c ) { + continue; + } + if (neighbor_rounds < edges(g, uid).size()) { + auto it = edges(g, u).begin(); + std::advance(it, neighbor_rounds); + for ( ; it != edges(g, u).end(); ++it) { + link(uid, target_id(g, *it), component); + } + } + for ( auto it2 = edges(g_t, u).begin(); it2 != edges(g_t, u).end(); ++it2) { + link(uid, target_id(g_t, *it2), component); + } + } + + compress(component); + vertex_id_t target_id = 0; + std::map,vertex_id_t> reindex; + for (vertex_id_t vtx = 0; vtx < N; ++vtx) { + if(!reindex.empty()) { + auto it = reindex.find(component[vtx]); + if (it != reindex.end()) { + component[vtx] = (*it).second; + } + } else if (component[vtx] == target_id) { + ++target_id; + } else if(component[vtx] > target_id) { + reindex.insert(pair(component[vtx], target_id)); + component[vtx] = target_id; + ++target_id; + } + } + + return target_id; +} + } // namespace graph #endif //GRAPH_CC_HPP diff --git a/tests/cc_tests.cpp b/tests/cc_tests.cpp index 4f5ab80..4cdf8cb 100644 --- a/tests/cc_tests.cpp +++ b/tests/cc_tests.cpp @@ -69,6 +69,72 @@ TEST_CASE("strongly connected components test", "[strong cc]") { REQUIRE(*std::ranges::max_element(component) == 2); } + +TEST_CASE("afforest test", "[afforest cc]") { + init_console(); + + using G = routes_vol_graph_type; + auto&& g = load_ordered_graph(TEST_DATA_ROOT_DIR "cc_undirected.csv", name_order_policy::alphabetical); + G gt; + + std::vector, vertex_id_t, double>> reverse; + vertex_id_t vid = 0; + for (auto&& u : vertices(g)) { + for (auto&& v : edges(g, u)) { + reverse.push_back(std::make_tuple(target_id(g, v), vid, edge_value(g, v))); + } + ++vid; + } + + + using value = std::ranges::range_value_t; + + vertex_id_t N = static_cast>(size(vertices(g))); + using edge_desc = graph::edge_info, true, void, double>; + auto edge_proj = [](const value& val) -> edge_desc { + return edge_desc{std::get<0>(val), std::get<1>(val), std::get<2>(val)}; + }; + + gt.load_edges(reverse, edge_proj, N); + + std::vector> component(size(vertices(g))); + auto components = graph::afforest(g, component); + REQUIRE(components == 3); + REQUIRE(*std::ranges::max_element(component) == 2); +} + +TEST_CASE("afforest test weak", "[afforest weak_cc]") { + init_console(); + + using G = routes_vol_graph_type; + auto&& g = load_ordered_graph(TEST_DATA_ROOT_DIR "cc_directed.csv", name_order_policy::alphabetical); + G gt; + + std::vector, vertex_id_t, double>> reverse; + vertex_id_t vid = 0; + for (auto&& u : vertices(g)) { + for (auto&& v : edges(g, u)) { + reverse.push_back(std::make_tuple(target_id(g, v), vid, edge_value(g, v))); + } + ++vid; + } + + + using value = std::ranges::range_value_t; + + vertex_id_t N = static_cast>(size(vertices(g))); + using edge_desc = graph::edge_info, true, void, double>; + auto edge_proj = [](const value& val) -> edge_desc { + return edge_desc{std::get<0>(val), std::get<1>(val), std::get<2>(val)}; + }; + + gt.load_edges(reverse, edge_proj, N); + + std::vector> component(size(vertices(g))); + auto components = graph::afforest(g, gt, component); + REQUIRE(components == 1); + REQUIRE(*std::ranges::max_element(component) == 0); +} #endif TEST_CASE("connected components test", "[cc]") { @@ -79,6 +145,5 @@ TEST_CASE("connected components test", "[cc]") { std::vector component(size(vertices(g))); graph::connected_components(g, component); - REQUIRE(*std::ranges::max_element(component) == 2); -} \ No newline at end of file +} From 7be1f909e0dd5c6b3587036fea3de3227e62718c Mon Sep 17 00:00:00 2001 From: kdeweese Date: Wed, 14 May 2025 16:42:08 -0700 Subject: [PATCH 2/2] remove unneeded header --- include/graph/algorithm/connected_components.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/graph/algorithm/connected_components.hpp b/include/graph/algorithm/connected_components.hpp index 677e4e8..8e02fab 100644 --- a/include/graph/algorithm/connected_components.hpp +++ b/include/graph/algorithm/connected_components.hpp @@ -21,7 +21,6 @@ #include "graph/views/breadth_first_search.hpp" #include #include -#include #ifndef GRAPH_CC_HPP # define GRAPH_CC_HPP