Skip to content

Commit f2e40d4

Browse files
authored
Merge pull request #2 from euphoricpoptarts/main
Jet Partitioner Improvements
2 parents 66973c4 + 0051a8c commit f2e40d4

12 files changed

+712
-1000
lines changed

CMakeLists.txt

+26-14
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,28 @@
1-
#IMPORTANT: Use the cmake flag -DCMAKE_CXX_COMPILER=/path/to/your/nvcc_wrapper
21
cmake_minimum_required(VERSION 3.18)
32
project(jetpartition CXX)
43
set(CMAKE_CXX_STANDARD 17)
54
set(CMAKE_CXX_STANDARD_REQUIRED True)
5+
if(NOT CMAKE_BUILD_TYPE)
6+
set(CMAKE_BUILD_TYPE Release)
7+
endif()
8+
# GKlib is a dependency of Metis when built from its github repo
9+
# Unfortunately there is no simple way to avoid also linking GKlib in this case
10+
# Older distributions of metis do not create this dependency
11+
SET(LINK_GKLIB False CACHE BOOL "Newer Metis distributions require us to link GKlib")
612

7-
#ALL DEVICE BUILDS
813
find_package(KokkosKernels REQUIRED)
9-
add_compile_options(-O3 -Wall -Wextra -Wshadow)
10-
#add_compile_options(-g -DDEBUG)
14+
add_compile_options(-Wall -Wextra -Wshadow)
15+
16+
# This is used by the build script
17+
# to avoid putting metis and gklib in the global path
18+
include_directories(${METIS_DIR}/include)
19+
link_directories(${METIS_DIR}/lib)
1120

1221
add_executable(jet partition.cpp)
1322
add_executable(jet4 partition.cpp)
1423
add_executable(jet2 partition.cpp)
1524
add_executable(jet_host partition.cpp)
16-
add_executable(jet_import partition.cpp)
25+
add_executable(jet_import import_coarse.cpp)
1726
add_executable(jet_export partition.cpp)
1827
add_executable(jet_serial partition.cpp)
1928
add_executable(pstat part_eval.cpp)
@@ -22,14 +31,17 @@ target_compile_definitions(jet PUBLIC HASHMAP_P)
2231
target_compile_definitions(jet4 PUBLIC HASHMAP_P FOUR9)
2332
target_compile_definitions(jet2 PUBLIC HASHMAP_P TWO9)
2433
target_compile_definitions(jet_host PUBLIC HASHMAP_P HOST)
25-
target_compile_definitions(jet_import PUBLIC HASHMAP_P HOST IMP)
34+
target_compile_definitions(jet_import PUBLIC HASHMAP_P HOST)
2635
target_compile_definitions(jet_export PUBLIC HASHMAP_P HOST EXP)
2736
target_compile_definitions(jet_serial PUBLIC HASHMAP_P SERIAL)
28-
target_link_libraries(jet Kokkos::kokkos Kokkos::kokkoskernels metis)
29-
target_link_libraries(jet4 Kokkos::kokkos Kokkos::kokkoskernels metis)
30-
target_link_libraries(jet2 Kokkos::kokkos Kokkos::kokkoskernels metis)
31-
target_link_libraries(jet_host Kokkos::kokkos Kokkos::kokkoskernels metis)
32-
target_link_libraries(jet_import Kokkos::kokkos Kokkos::kokkoskernels)
33-
target_link_libraries(jet_export Kokkos::kokkos Kokkos::kokkoskernels metis)
34-
target_link_libraries(jet_serial Kokkos::kokkos Kokkos::kokkoskernels metis)
35-
target_link_libraries(pstat Kokkos::kokkos Kokkos::kokkoskernels)
37+
foreach(prog jet jet4 jet2 jet_host jet_import jet_export jet_serial pstat)
38+
target_link_libraries(${prog} Kokkos::kokkos Kokkos::kokkoskernels)
39+
endforeach(prog)
40+
foreach(prog jet jet4 jet2 jet_host jet_export jet_serial)
41+
target_link_libraries(${prog} metis)
42+
endforeach(prog)
43+
if(LINK_GKLIB)
44+
foreach(prog jet jet4 jet2 jet_host jet_export jet_serial)
45+
target_link_libraries(${prog} GKlib)
46+
endforeach(prog)
47+
endif()

README.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,15 @@ For details about the algorithm, please see https://arxiv.org/abs/2304.13194
88

99
Kokkos (https://github.com/kokkos/kokkos): Enables performance portable parallelism.
1010
KokkosKernels (https://github.com/kokkos/kokkos-kernels): Necessary only for KokkosSparse::CrsMatrix class.
11-
Metis (https://github.com/KarypisLab/METIS): Used for initial partitioning of coarsest graph.
11+
Metis (https://github.com/KarypisLab/METIS): Used for initial partitioning of coarsest graph.
12+
(Circumstantial) GKLib (https://github.com/KarypisLab/GKlib.git): Needed to link against the github distribution of Metis. Not needed for older distributions of Metis.
1213

1314
## Usage
1415

16+
### Building
17+
18+
Standard CMake build process. If your Metis build requires GKlib, add `-DLINK_GKLIB=True` to your cmake command when building Jet. Example build scripts are provided for macOS with OpenMP and Linux systems with Cuda. These scripts handle all required dependencies.
19+
1520
### Executables
1621

1722
#### Partitioners

contract.hpp

+23-124
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,6 @@
4848

4949
namespace jet_partitioner {
5050

51-
template<typename ordinal_t>
52-
KOKKOS_INLINE_FUNCTION ordinal_t xorshiftHash(ordinal_t key) {
53-
ordinal_t x = key;
54-
x ^= x << 13;
55-
x ^= x >> 17;
56-
x ^= x << 5;
57-
return x;
58-
}
59-
6051
template<class crsMat> //typename ordinal_t, typename edge_offset_t, typename scalar_t, class Device>
6152
class contracter {
6253
public:
@@ -99,7 +90,7 @@ class contracter {
9990
wgt_view_t vtx_w;
10091
coarse_map interp_mtx;
10192
int level;
102-
bool uniform_weights;
93+
bool uniform_weights = false;
10394
};
10495

10596
// define behavior-controlling enums
@@ -112,6 +103,16 @@ class contracter {
112103
ordinal_t coarse_vtx_cutoff = 1000;
113104
ordinal_t min_allowed_vtx = 250;
114105
unsigned int max_levels = 200;
106+
const ordinal_t large_row_threshold = 1000;
107+
108+
bool has_large_row(const matrix_t g){
109+
ordinal_t max_row = 0;
110+
Kokkos::parallel_reduce("find max row", policy_t(0, g.numRows()), KOKKOS_LAMBDA(const ordinal_t i, ordinal_t& update){
111+
ordinal_t degree = g.graph.row_map(i+1) - g.graph.row_map(i);
112+
if(degree > update) update = degree;
113+
}, Kokkos::Max<ordinal_t, Kokkos::HostSpace>(max_row));
114+
return (max_row >= large_row_threshold);
115+
}
115116

116117
bool should_use_dyn(const ordinal_t n, const Kokkos::View<const edge_offset_t*, Device> work, int t_count){
117118
bool use_dyn = false;
@@ -194,15 +195,15 @@ struct combineAndDedupe {
194195
edge_offset_t insert(const edge_offset_t& hash_start, const edge_offset_t& size, const ordinal_t& u) const {
195196
edge_offset_t offset = abs(xorshiftHash<ordinal_t>(u)) % size;
196197
while(true){
197-
if(htable(hash_start + offset) == -1){
198-
Kokkos::atomic_compare_exchange(&htable(hash_start + offset), -1, u);
198+
ordinal_t v = htable(hash_start + offset);
199+
if(v == -1){
200+
v = Kokkos::atomic_compare_exchange(&htable(hash_start + offset), -1, u);
199201
}
200-
if(htable(hash_start + offset) == u){
202+
if(v == u || v == -1){
201203
return offset;
202-
} else {
203-
offset++;
204-
if(offset >= size) offset -= size;
205204
}
205+
offset++;
206+
if(offset >= size) offset -= size;
206207
}
207208
}
208209

@@ -372,11 +373,13 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
372373
vtx_view_t htable(Kokkos::ViewAllocateWithoutInitializing("hashtable keys"), hash_size);
373374
Kokkos::deep_copy(htable, -1);
374375
wgt_view_t hvals("hashtable values", hash_size);
376+
// use thread teams on gpu when graph has decent average degree or very large max degree
377+
bool use_team = (!is_host_space && (hash_size / n >= 12 || has_large_row(g)));
375378
//insert each coarse vertex into a bucket determined by a hash
376379
//use linear probing to resolve conflicts
377380
//combine weights using atomic addition
378381
combineAndDedupe cnd(g, vcmap.map, htable, hvals, hrow_map);
379-
if(!is_host_space && hash_size / n >= 12) {
382+
if(use_team) {
380383
Kokkos::parallel_for("deduplicate", team_policy_t(n, Kokkos::AUTO), cnd);
381384
} else {
382385
bool use_dyn = should_use_dyn(n, g.graph.row_map, exec_space().concurrency());
@@ -391,7 +394,7 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
391394
timer.reset();
392395
edge_view_t coarse_row_map_f("edges_per_source", nc + 1);
393396
countUnique cu(htable, hrow_map, coarse_row_map_f);
394-
if(!is_host_space && hash_size / nc >= 12) {
397+
if(use_team) {
395398
Kokkos::parallel_for("count unique", team_policy_t(nc, Kokkos::AUTO), cu);
396399
} else {
397400
Kokkos::parallel_for("count unique", policy_t(0, nc), cu);
@@ -412,7 +415,7 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
412415
vtx_view_t entries_coarse(Kokkos::ViewAllocateWithoutInitializing("coarse entries"), hash_size);
413416
wgt_view_t wgts_coarse(Kokkos::ViewAllocateWithoutInitializing("coarse weights"), hash_size);
414417
consolidateUnique consolidate(htable, entries_coarse, hvals, wgts_coarse, hrow_map, coarse_row_map_f);
415-
if(!is_host_space && hash_size / nc >= 12) {
418+
if(use_team) {
416419
Kokkos::parallel_for("consolidate", team_policy_t(nc, Kokkos::AUTO).set_scratch_size(0, Kokkos::PerTeam(4*sizeof(ordinal_t))), consolidate);
417420
} else {
418421
bool use_dyn = should_use_dyn(nc, hrow_map, exec_space().concurrency());
@@ -474,118 +477,14 @@ coarse_map generate_coarse_mapping(const matrix_t g,
474477
break;
475478
case Match:
476479
case MtMetis:
477-
interpolation_graph = mapper.coarsen_match(g, uniform_weights, rand_pool, experiment, choice);
480+
interpolation_graph = mapper.coarsen_match(g, uniform_weights, rand_pool, choice);
478481
break;
479482
}
480483
Kokkos::fence();
481484
experiment.addMeasurement(Measurement::Map, timer.seconds());
482485
return interpolation_graph;
483486
}
484487

485-
void dump_stats(const matrix_t g, const wgt_view_t vtx_w_dev){
486-
typename edge_view_t::HostMirror row_map("row map mirror", g.numRows() + 1);
487-
typename wgt_view_t::HostMirror values("entries mirror", g.nnz());
488-
typename wgt_view_t::HostMirror vtx_w("vtx_w mirror", g.numRows());
489-
Kokkos::deep_copy(row_map, g.graph.row_map);
490-
Kokkos::deep_copy(values, g.values);
491-
Kokkos::deep_copy(vtx_w, vtx_w_dev);
492-
std::ofstream degree_out("dump/degree.txt");
493-
std::ofstream vtx_w_out("dump/vtx_w.txt");
494-
std::ofstream wgt_degree_out("dump/wgt_degree.txt");
495-
for(ordinal_t i = 0; i < g.numRows(); i++){
496-
degree_out << row_map(i+1) - row_map(i) << std::endl;
497-
vtx_w_out << vtx_w(i) << std::endl;
498-
edge_offset_t wgt_degree = 0;
499-
for(edge_offset_t j = row_map(i); j < row_map(i+1); j++){
500-
wgt_degree += values(j);
501-
}
502-
wgt_degree_out << wgt_degree << std::endl;
503-
}
504-
degree_out.close();
505-
wgt_degree_out.close();
506-
vtx_w_out.close();
507-
}
508-
509-
std::list<coarse_level_triple> load_coarse(){
510-
FILE* cgfp = fopen("/home/mike/workspace/mt-KaHIP/coarse_graphs.out", "r");
511-
int size = 0;
512-
fread(&size, sizeof(int), 1, cgfp);
513-
printf("Number of graphs: %i\n", size);
514-
std::list<coarse_level_triple> levels;
515-
ordinal_t prev_n = 0;
516-
for(int i = 0; i < size; i++){
517-
coarse_level_triple level;
518-
level.level = i + 1;
519-
ordinal_t N = 0;
520-
fread(&N, sizeof(ordinal_t), 1, cgfp);
521-
edge_offset_t M = 0;
522-
fread(&M, sizeof(edge_offset_t), 1, cgfp);
523-
edge_view_t rows("rows", N + 1);
524-
auto rows_m = Kokkos::create_mirror_view(rows);
525-
fread(rows_m.data(), sizeof(edge_offset_t), N + 1, cgfp);
526-
Kokkos::deep_copy(rows, rows_m);
527-
vtx_view_t entries("entries", M);
528-
auto entries_m = Kokkos::create_mirror_view(entries);
529-
fread(entries_m.data(), sizeof(ordinal_t), M, cgfp);
530-
Kokkos::deep_copy(entries, entries_m);
531-
wgt_view_t values("values", M);
532-
auto values_m = Kokkos::create_mirror_view(values);
533-
fread(values_m.data(), sizeof(scalar_t), M, cgfp);
534-
Kokkos::deep_copy(values, values_m);
535-
graph_type graph(entries, rows);
536-
matrix_t g("g", N, values, graph);
537-
level.mtx = g;
538-
wgt_view_t vtx_wgts("vtx wgts", N);
539-
auto vtx_wgts_m = Kokkos::create_mirror_view(vtx_wgts);
540-
fread(vtx_wgts_m.data(), sizeof(scalar_t), N, cgfp);
541-
Kokkos::deep_copy(vtx_wgts, vtx_wgts_m);
542-
level.vtx_w = vtx_wgts;
543-
if(level.level > 1){
544-
vtx_view_t i_entries("entries", prev_n);
545-
auto i_entries_m = Kokkos::create_mirror_view(i_entries);
546-
fread(i_entries_m.data(), sizeof(ordinal_t), prev_n, cgfp);
547-
Kokkos::deep_copy(i_entries, i_entries_m);
548-
coarse_map i_g;
549-
i_g.coarse_vtx = N;
550-
i_g.map = i_entries;
551-
level.interp_mtx = i_g;
552-
}
553-
prev_n = N;
554-
levels.push_back(level);
555-
}
556-
fclose(cgfp);
557-
return levels;
558-
}
559-
560-
void dump_coarse(std::list<coarse_level_triple> levels){
561-
FILE* cgfp = fopen("/home/mike/workspace/mt-KaHIP/coarse_graphs.out", "w");
562-
int size = levels.size();
563-
fwrite(&size, sizeof(int), 1, cgfp);
564-
ordinal_t prev_n = 0;
565-
for(auto level : levels){
566-
matrix_t g = level.mtx;
567-
ordinal_t N = g.numRows();
568-
edge_offset_t M = g.nnz();
569-
auto rows = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), g.graph.row_map);
570-
auto entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), g.graph.entries);
571-
auto values = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), g.values);
572-
auto vtx_wgts = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), level.vtx_w);
573-
fwrite(&N, sizeof(ordinal_t), 1, cgfp);
574-
fwrite(&M, sizeof(edge_offset_t), 1, cgfp);
575-
fwrite(rows.data(), sizeof(edge_offset_t), N+1, cgfp);
576-
fwrite(entries.data(), sizeof(ordinal_t), M, cgfp);
577-
fwrite(values.data(), sizeof(scalar_t), M, cgfp);
578-
fwrite(vtx_wgts.data(), sizeof(scalar_t), N, cgfp);
579-
if(level.level > 1){
580-
coarse_map interp_mtx = level.interp_mtx;
581-
auto i_entries = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), interp_mtx.map);
582-
fwrite(i_entries.data(), sizeof(ordinal_t), prev_n, cgfp);
583-
}
584-
prev_n = N;
585-
}
586-
fclose(cgfp);
587-
}
588-
589488
std::list<coarse_level_triple> generate_coarse_graphs(const matrix_t fine_g, const wgt_view_t vweights, ExperimentLoggerUtil<scalar_t>& experiment, bool uniform_eweights = false) {
590489

591490
Kokkos::Timer timer;

0 commit comments

Comments
 (0)