Skip to content

Commit 65fca66

Browse files
committed
Add a few regression tests for #3
1 parent 3dfc639 commit 65fca66

File tree

3 files changed

+271
-5
lines changed

3 files changed

+271
-5
lines changed

onager/src/algorithms/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub mod subgraphs;
1919
pub mod traversal;
2020

2121
#[cfg(test)]
22-
mod large_graph_tests;
22+
mod regression_tests;
2323

2424
// Re-export all public items for backward compatibility
2525
pub use approximation::*;

onager/src/algorithms/large_graph_tests.rs renamed to onager/src/algorithms/regression_tests.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
//! Large graph tests to verify algorithms work on graphs with 10k+ nodes.
2-
//!
3-
//! These tests specifically target the 12,288 node boundary where
4-
//! DuckDB's STANDARD_VECTOR_SIZE (2048) * 6 = 12288 chunks align.
1+
//! # Check (GitHub Issue #3)
52
63
#[cfg(test)]
74
mod tests {
@@ -123,4 +120,41 @@ mod tests {
123120
let pr = result.unwrap();
124121
assert_eq!(pr.node_ids.len(), 12_289);
125122
}
123+
124+
#[test]
125+
fn test_connected_components_50k_nodes() {
126+
let (src, dst) = generate_graph_edges(50_000);
127+
let result = compute_connected_components(&src, &dst);
128+
assert!(
129+
result.is_ok(),
130+
"Connected components should succeed on 50k nodes (GitHub #3 regression test)"
131+
);
132+
let cc = result.unwrap();
133+
assert_eq!(cc.node_ids.len(), 50_000, "Should return all 50k nodes");
134+
}
135+
136+
#[test]
137+
fn test_pagerank_50k_nodes() {
138+
let (src, dst) = generate_graph_edges(50_000);
139+
// Use fewer iterations for faster test execution
140+
let result = compute_pagerank(&src, &dst, &[], 0.85, 10, true);
141+
assert!(
142+
result.is_ok(),
143+
"PageRank should succeed on 50k nodes (GitHub #3 regression test)"
144+
);
145+
let pr = result.unwrap();
146+
assert_eq!(pr.node_ids.len(), 50_000, "Should return all 50k nodes");
147+
}
148+
149+
#[test]
150+
fn test_louvain_50k_nodes() {
151+
let (src, dst) = generate_graph_edges(50_000);
152+
let result = compute_louvain(&src, &dst, Some(42));
153+
assert!(
154+
result.is_ok(),
155+
"Louvain should succeed on 50k nodes (GitHub #3 regression test)"
156+
);
157+
let lv = result.unwrap();
158+
assert_eq!(lv.node_ids.len(), 50_000, "Should return all 50k nodes");
159+
}
126160
}
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# group: [onager]
2+
3+
# Regression test for GitHub issue #3
4+
5+
require onager
6+
7+
statement ok
8+
pragma enable_verification
9+
10+
# Test with multiple threads to catch batch index issues
11+
statement ok
12+
SET threads TO 4
13+
14+
# =============================================================================
15+
# Test 1: Small graph materialization with multiple threads
16+
# =============================================================================
17+
18+
statement ok
19+
CREATE TABLE small_edges AS
20+
SELECT * FROM (VALUES
21+
(1::bigint, 2::bigint), (2, 3), (3, 4), (4, 5),
22+
(5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 1),
23+
(1, 5), (2, 6), (3, 7), (4, 8), (5, 9)
24+
) t(src, dst)
25+
26+
# Materialize connected components results into a table
27+
statement ok
28+
CREATE TABLE cc_results AS
29+
SELECT * FROM onager_cmm_components((SELECT src, dst FROM small_edges))
30+
31+
query I
32+
SELECT count(*) FROM cc_results
33+
----
34+
10
35+
36+
statement ok
37+
DROP TABLE cc_results
38+
39+
# Materialize pagerank results into a table
40+
statement ok
41+
CREATE TABLE pr_results AS
42+
SELECT * FROM onager_ctr_pagerank((SELECT src, dst FROM small_edges))
43+
44+
query I
45+
SELECT count(*) FROM pr_results
46+
----
47+
10
48+
49+
statement ok
50+
DROP TABLE pr_results
51+
52+
statement ok
53+
DROP TABLE small_edges
54+
55+
# =============================================================================
56+
# Test 2: Medium graph (1000 nodes) - tests chunked output with threads
57+
# =============================================================================
58+
59+
# Generate a graph with ~1000 nodes using Erdos-Renyi
60+
statement ok
61+
CREATE TABLE medium_edges AS
62+
SELECT src, dst FROM onager_gen_erdos_renyi(1000, 0.01, seed := 42)
63+
64+
# Materialize community detection results
65+
statement ok
66+
CREATE TABLE louvain_results AS
67+
SELECT * FROM onager_cmm_louvain((SELECT src, dst FROM medium_edges))
68+
69+
query I
70+
SELECT count(*) >= 900 FROM louvain_results
71+
----
72+
1
73+
74+
statement ok
75+
DROP TABLE louvain_results
76+
77+
# Materialize centrality results
78+
statement ok
79+
CREATE TABLE degree_results AS
80+
SELECT * FROM onager_ctr_degree((SELECT src, dst FROM medium_edges))
81+
82+
query I
83+
SELECT count(*) >= 900 FROM degree_results
84+
----
85+
1
86+
87+
statement ok
88+
DROP TABLE degree_results
89+
90+
statement ok
91+
DROP TABLE medium_edges
92+
93+
# =============================================================================
94+
# Test 3: Verify SELECT * works (not just aggregations like count(*))
95+
# =============================================================================
96+
97+
statement ok
98+
CREATE TABLE test_graph AS
99+
SELECT * FROM (VALUES
100+
(1::bigint, 2::bigint), (2, 3), (3, 1)
101+
) t(src, dst)
102+
103+
# This specifically tests materializing all columns, which triggered the bug
104+
statement ok
105+
CREATE TABLE full_results AS
106+
SELECT node_id, component FROM onager_cmm_components((SELECT src, dst FROM test_graph))
107+
108+
query II rowsort
109+
SELECT * FROM full_results
110+
----
111+
1 1
112+
2 1
113+
3 1
114+
115+
statement ok
116+
DROP TABLE full_results
117+
118+
statement ok
119+
DROP TABLE test_graph
120+
121+
# =============================================================================
122+
# Test 4: Test various algorithms with CREATE TABLE AS
123+
# =============================================================================
124+
125+
statement ok
126+
CREATE TABLE algo_test_edges AS
127+
SELECT src, dst FROM onager_gen_barabasi_albert(500, 3, seed := 123)
128+
129+
# Test approximation algorithms
130+
statement ok
131+
CREATE TABLE clique_results AS
132+
SELECT * FROM onager_apx_max_clique((SELECT src, dst FROM algo_test_edges))
133+
134+
query I
135+
SELECT count(*) > 0 FROM clique_results
136+
----
137+
1
138+
139+
statement ok
140+
DROP TABLE clique_results
141+
142+
# Test link prediction
143+
statement ok
144+
CREATE TABLE jaccard_results AS
145+
SELECT * FROM onager_lnk_jaccard((SELECT src, dst FROM algo_test_edges))
146+
147+
query I
148+
SELECT count(*) > 0 FROM jaccard_results
149+
----
150+
1
151+
152+
statement ok
153+
DROP TABLE jaccard_results
154+
155+
# Test traversal
156+
statement ok
157+
CREATE TABLE bfs_results AS
158+
SELECT * FROM onager_trv_bfs((SELECT src, dst FROM algo_test_edges), source := 1)
159+
160+
query I
161+
SELECT count(*) > 0 FROM bfs_results
162+
----
163+
1
164+
165+
statement ok
166+
DROP TABLE bfs_results
167+
168+
# Test metrics (scalar results)
169+
statement ok
170+
CREATE TABLE density_results AS
171+
SELECT * FROM onager_mtr_density((SELECT src, dst FROM algo_test_edges))
172+
173+
query I
174+
SELECT count(*) FROM density_results
175+
----
176+
1
177+
178+
statement ok
179+
DROP TABLE density_results
180+
181+
# Test MST
182+
statement ok
183+
CREATE TABLE mst_edges AS
184+
SELECT src::bigint, dst::bigint, 1.0::double as weight FROM algo_test_edges
185+
186+
statement ok
187+
CREATE TABLE mst_results AS
188+
SELECT * FROM onager_mst_kruskal((SELECT src, dst, weight FROM mst_edges))
189+
190+
query I
191+
SELECT count(*) > 0 FROM mst_results
192+
----
193+
1
194+
195+
statement ok
196+
DROP TABLE mst_results
197+
198+
statement ok
199+
DROP TABLE mst_edges
200+
201+
statement ok
202+
DROP TABLE algo_test_edges
203+
204+
# =============================================================================
205+
# Test 5: Test with single thread (baseline - should always work)
206+
# =============================================================================
207+
208+
statement ok
209+
SET threads TO 1
210+
211+
statement ok
212+
CREATE TABLE single_thread_edges AS
213+
SELECT src, dst FROM onager_gen_erdos_renyi(200, 0.05, seed := 99)
214+
215+
statement ok
216+
CREATE TABLE single_thread_results AS
217+
SELECT * FROM onager_cmm_components((SELECT src, dst FROM single_thread_edges))
218+
219+
query I
220+
SELECT count(*) > 0 FROM single_thread_results
221+
----
222+
1
223+
224+
statement ok
225+
DROP TABLE single_thread_results
226+
227+
statement ok
228+
DROP TABLE single_thread_edges
229+
230+
# Reset to default
231+
statement ok
232+
RESET threads

0 commit comments

Comments
 (0)