48
48
49
49
namespace jet_partitioner {
50
50
51
- template <typename ordinal_t >
52
- KOKKOS_INLINE_FUNCTION ordinal_t xorshiftHash (ordinal_t key) {
53
- ordinal_t x = key;
54
- x ^= x << 13 ;
55
- x ^= x >> 17 ;
56
- x ^= x << 5 ;
57
- return x;
58
- }
59
-
60
51
template <class crsMat > // typename ordinal_t, typename edge_offset_t, typename scalar_t, class Device>
61
52
class contracter {
62
53
public:
@@ -99,7 +90,7 @@ class contracter {
99
90
wgt_view_t vtx_w;
100
91
coarse_map interp_mtx;
101
92
int level;
102
- bool uniform_weights;
93
+ bool uniform_weights = false ;
103
94
};
104
95
105
96
// define behavior-controlling enums
@@ -112,6 +103,16 @@ class contracter {
112
103
ordinal_t coarse_vtx_cutoff = 1000 ;
113
104
ordinal_t min_allowed_vtx = 250 ;
114
105
unsigned int max_levels = 200 ;
106
+ const ordinal_t large_row_threshold = 1000 ;
107
+
108
+ bool has_large_row (const matrix_t g){
109
+ ordinal_t max_row = 0 ;
110
+ Kokkos::parallel_reduce (" find max row" , policy_t (0 , g.numRows ()), KOKKOS_LAMBDA (const ordinal_t i, ordinal_t & update){
111
+ ordinal_t degree = g.graph .row_map (i+1 ) - g.graph .row_map (i);
112
+ if (degree > update) update = degree;
113
+ }, Kokkos::Max<ordinal_t , Kokkos::HostSpace>(max_row));
114
+ return (max_row >= large_row_threshold);
115
+ }
115
116
116
117
bool should_use_dyn (const ordinal_t n, const Kokkos::View<const edge_offset_t *, Device> work, int t_count){
117
118
bool use_dyn = false ;
@@ -194,15 +195,15 @@ struct combineAndDedupe {
194
195
edge_offset_t insert (const edge_offset_t & hash_start, const edge_offset_t & size, const ordinal_t & u) const {
195
196
edge_offset_t offset = abs (xorshiftHash<ordinal_t >(u)) % size;
196
197
while (true ){
197
- if (htable (hash_start + offset) == -1 ){
198
- Kokkos::atomic_compare_exchange (&htable (hash_start + offset), -1 , u);
198
+ ordinal_t v = htable (hash_start + offset);
199
+ if (v == -1 ){
200
+ v = Kokkos::atomic_compare_exchange (&htable (hash_start + offset), -1 , u);
199
201
}
200
- if (htable (hash_start + offset) == u ){
202
+ if (v == u || v == - 1 ){
201
203
return offset;
202
- } else {
203
- offset++;
204
- if (offset >= size) offset -= size;
205
204
}
205
+ offset++;
206
+ if (offset >= size) offset -= size;
206
207
}
207
208
}
208
209
@@ -372,11 +373,13 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
372
373
vtx_view_t htable (Kokkos::ViewAllocateWithoutInitializing (" hashtable keys" ), hash_size);
373
374
Kokkos::deep_copy (htable, -1 );
374
375
wgt_view_t hvals (" hashtable values" , hash_size);
376
+ // use thread teams on gpu when graph has decent average degree or very large max degree
377
+ bool use_team = (!is_host_space && (hash_size / n >= 12 || has_large_row (g)));
375
378
// insert each coarse vertex into a bucket determined by a hash
376
379
// use linear probing to resolve conflicts
377
380
// combine weights using atomic addition
378
381
combineAndDedupe cnd (g, vcmap.map , htable, hvals, hrow_map);
379
- if (!is_host_space && hash_size / n >= 12 ) {
382
+ if (use_team ) {
380
383
Kokkos::parallel_for (" deduplicate" , team_policy_t (n, Kokkos::AUTO), cnd);
381
384
} else {
382
385
bool use_dyn = should_use_dyn (n, g.graph .row_map , exec_space ().concurrency ());
@@ -391,7 +394,7 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
391
394
timer.reset ();
392
395
edge_view_t coarse_row_map_f (" edges_per_source" , nc + 1 );
393
396
countUnique cu (htable, hrow_map, coarse_row_map_f);
394
- if (!is_host_space && hash_size / nc >= 12 ) {
397
+ if (use_team ) {
395
398
Kokkos::parallel_for (" count unique" , team_policy_t (nc, Kokkos::AUTO), cu);
396
399
} else {
397
400
Kokkos::parallel_for (" count unique" , policy_t (0 , nc), cu);
@@ -412,7 +415,7 @@ coarse_level_triple build_coarse_graph(const coarse_level_triple level,
412
415
vtx_view_t entries_coarse (Kokkos::ViewAllocateWithoutInitializing (" coarse entries" ), hash_size);
413
416
wgt_view_t wgts_coarse (Kokkos::ViewAllocateWithoutInitializing (" coarse weights" ), hash_size);
414
417
consolidateUnique consolidate (htable, entries_coarse, hvals, wgts_coarse, hrow_map, coarse_row_map_f);
415
- if (!is_host_space && hash_size / nc >= 12 ) {
418
+ if (use_team ) {
416
419
Kokkos::parallel_for (" consolidate" , team_policy_t (nc, Kokkos::AUTO).set_scratch_size (0 , Kokkos::PerTeam (4 *sizeof (ordinal_t ))), consolidate);
417
420
} else {
418
421
bool use_dyn = should_use_dyn (nc, hrow_map, exec_space ().concurrency ());
@@ -474,118 +477,14 @@ coarse_map generate_coarse_mapping(const matrix_t g,
474
477
break ;
475
478
case Match:
476
479
case MtMetis:
477
- interpolation_graph = mapper.coarsen_match (g, uniform_weights, rand_pool, experiment, choice);
480
+ interpolation_graph = mapper.coarsen_match (g, uniform_weights, rand_pool, choice);
478
481
break ;
479
482
}
480
483
Kokkos::fence ();
481
484
experiment.addMeasurement (Measurement::Map, timer.seconds ());
482
485
return interpolation_graph;
483
486
}
484
487
485
- void dump_stats (const matrix_t g, const wgt_view_t vtx_w_dev){
486
- typename edge_view_t ::HostMirror row_map (" row map mirror" , g.numRows () + 1 );
487
- typename wgt_view_t ::HostMirror values (" entries mirror" , g.nnz ());
488
- typename wgt_view_t ::HostMirror vtx_w (" vtx_w mirror" , g.numRows ());
489
- Kokkos::deep_copy (row_map, g.graph .row_map );
490
- Kokkos::deep_copy (values, g.values );
491
- Kokkos::deep_copy (vtx_w, vtx_w_dev);
492
- std::ofstream degree_out (" dump/degree.txt" );
493
- std::ofstream vtx_w_out (" dump/vtx_w.txt" );
494
- std::ofstream wgt_degree_out (" dump/wgt_degree.txt" );
495
- for (ordinal_t i = 0 ; i < g.numRows (); i++){
496
- degree_out << row_map (i+1 ) - row_map (i) << std::endl;
497
- vtx_w_out << vtx_w (i) << std::endl;
498
- edge_offset_t wgt_degree = 0 ;
499
- for (edge_offset_t j = row_map (i); j < row_map (i+1 ); j++){
500
- wgt_degree += values (j);
501
- }
502
- wgt_degree_out << wgt_degree << std::endl;
503
- }
504
- degree_out.close ();
505
- wgt_degree_out.close ();
506
- vtx_w_out.close ();
507
- }
508
-
509
- std::list<coarse_level_triple> load_coarse (){
510
- FILE* cgfp = fopen (" /home/mike/workspace/mt-KaHIP/coarse_graphs.out" , " r" );
511
- int size = 0 ;
512
- fread (&size, sizeof (int ), 1 , cgfp);
513
- printf (" Number of graphs: %i\n " , size);
514
- std::list<coarse_level_triple> levels;
515
- ordinal_t prev_n = 0 ;
516
- for (int i = 0 ; i < size; i++){
517
- coarse_level_triple level;
518
- level.level = i + 1 ;
519
- ordinal_t N = 0 ;
520
- fread (&N, sizeof (ordinal_t ), 1 , cgfp);
521
- edge_offset_t M = 0 ;
522
- fread (&M, sizeof (edge_offset_t ), 1 , cgfp);
523
- edge_view_t rows (" rows" , N + 1 );
524
- auto rows_m = Kokkos::create_mirror_view (rows);
525
- fread (rows_m.data (), sizeof (edge_offset_t ), N + 1 , cgfp);
526
- Kokkos::deep_copy (rows, rows_m);
527
- vtx_view_t entries (" entries" , M);
528
- auto entries_m = Kokkos::create_mirror_view (entries);
529
- fread (entries_m.data (), sizeof (ordinal_t ), M, cgfp);
530
- Kokkos::deep_copy (entries, entries_m);
531
- wgt_view_t values (" values" , M);
532
- auto values_m = Kokkos::create_mirror_view (values);
533
- fread (values_m.data (), sizeof (scalar_t ), M, cgfp);
534
- Kokkos::deep_copy (values, values_m);
535
- graph_type graph (entries, rows);
536
- matrix_t g (" g" , N, values, graph);
537
- level.mtx = g;
538
- wgt_view_t vtx_wgts (" vtx wgts" , N);
539
- auto vtx_wgts_m = Kokkos::create_mirror_view (vtx_wgts);
540
- fread (vtx_wgts_m.data (), sizeof (scalar_t ), N, cgfp);
541
- Kokkos::deep_copy (vtx_wgts, vtx_wgts_m);
542
- level.vtx_w = vtx_wgts;
543
- if (level.level > 1 ){
544
- vtx_view_t i_entries (" entries" , prev_n);
545
- auto i_entries_m = Kokkos::create_mirror_view (i_entries);
546
- fread (i_entries_m.data (), sizeof (ordinal_t ), prev_n, cgfp);
547
- Kokkos::deep_copy (i_entries, i_entries_m);
548
- coarse_map i_g;
549
- i_g.coarse_vtx = N;
550
- i_g.map = i_entries;
551
- level.interp_mtx = i_g;
552
- }
553
- prev_n = N;
554
- levels.push_back (level);
555
- }
556
- fclose (cgfp);
557
- return levels;
558
- }
559
-
560
- void dump_coarse (std::list<coarse_level_triple> levels){
561
- FILE* cgfp = fopen (" /home/mike/workspace/mt-KaHIP/coarse_graphs.out" , " w" );
562
- int size = levels.size ();
563
- fwrite (&size, sizeof (int ), 1 , cgfp);
564
- ordinal_t prev_n = 0 ;
565
- for (auto level : levels){
566
- matrix_t g = level.mtx ;
567
- ordinal_t N = g.numRows ();
568
- edge_offset_t M = g.nnz ();
569
- auto rows = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), g.graph .row_map );
570
- auto entries = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), g.graph .entries );
571
- auto values = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), g.values );
572
- auto vtx_wgts = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), level.vtx_w );
573
- fwrite (&N, sizeof (ordinal_t ), 1 , cgfp);
574
- fwrite (&M, sizeof (edge_offset_t ), 1 , cgfp);
575
- fwrite (rows.data (), sizeof (edge_offset_t ), N+1 , cgfp);
576
- fwrite (entries.data (), sizeof (ordinal_t ), M, cgfp);
577
- fwrite (values.data (), sizeof (scalar_t ), M, cgfp);
578
- fwrite (vtx_wgts.data (), sizeof (scalar_t ), N, cgfp);
579
- if (level.level > 1 ){
580
- coarse_map interp_mtx = level.interp_mtx ;
581
- auto i_entries = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), interp_mtx.map );
582
- fwrite (i_entries.data (), sizeof (ordinal_t ), prev_n, cgfp);
583
- }
584
- prev_n = N;
585
- }
586
- fclose (cgfp);
587
- }
588
-
589
488
std::list<coarse_level_triple> generate_coarse_graphs (const matrix_t fine_g, const wgt_view_t vweights, ExperimentLoggerUtil<scalar_t >& experiment, bool uniform_eweights = false ) {
590
489
591
490
Kokkos::Timer timer;
0 commit comments