Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions include/hashinator/hashinator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,13 @@ class Hashmap {
buckets, elements, rule, nBlocks, mPool, s);
return retval;
}
template <typename Rule, typename ALLOCATOR>
void extractPatternLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>, ALLOCATOR>& elements, Rule rule,
split_gpuStream_t s = 0) {
// Extract elements matching the Pattern Rule(element)==true;
split::tools::copy_if_loop<hash_pair<KEY_TYPE, VAL_TYPE>, Rule, ALLOCATOR, defaults::MAX_BLOCKSIZE, defaults::WARPSIZE>(
*device_buckets, elements, rule, s);
}
template <typename Rule>
void extractPatternLoop(split::SplitVector<hash_pair<KEY_TYPE, VAL_TYPE>>& elements, Rule rule,
split_gpuStream_t s = 0) {
Expand Down Expand Up @@ -1226,6 +1233,12 @@ class Hashmap {
defaults::WARPSIZE>(buckets, elements, rule, stack, max_size, s);
return elements.size();
}
template <typename Rule, typename ALLOCATOR>
void extractKeysByPatternLoop(split::SplitVector<KEY_TYPE, ALLOCATOR>& elements, Rule rule, split_gpuStream_t s = 0) {
// Extract element **keys** matching the Pattern Rule(element)==true;
split::tools::copy_if_keys_loop<hash_pair<KEY_TYPE, VAL_TYPE>, KEY_TYPE, Rule, ALLOCATOR, defaults::MAX_BLOCKSIZE,
defaults::WARPSIZE>(*device_buckets, elements, rule, s);
}
template <typename Rule>
void extractKeysByPatternLoop(split::SplitVector<KEY_TYPE>& elements, Rule rule, split_gpuStream_t s = 0) {
// Extract element **keys** matching the Pattern Rule(element)==true;
Expand All @@ -1250,6 +1263,14 @@ class Hashmap {
};
return extractKeysByPattern<prefetches>(elements, rule, stack, max_size, s);
}
template <typename ALLOCATOR>
void extractAllKeysLoop(split::SplitVector<KEY_TYPE, ALLOCATOR>& elements, split_gpuStream_t s = 0) {
// Extract all keys
auto rule = [] __host__ __device__(const hash_pair<KEY_TYPE, VAL_TYPE>& kval) -> bool {
return kval.first != EMPTYBUCKET && kval.first != TOMBSTONE;
};
extractKeysByPatternLoop(elements, rule, s);
}
void extractAllKeysLoop(split::SplitVector<KEY_TYPE>& elements, split_gpuStream_t s = 0) {
// Extract all keys
auto rule = [] __host__ __device__(const hash_pair<KEY_TYPE, VAL_TYPE>& kval) -> bool {
Expand Down
39 changes: 35 additions & 4 deletions include/splitvector/split_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -715,9 +715,9 @@ __global__ void block_compact_keys(T* input, U* output, size_t inputSize, Rule r
}
}

template <typename T, typename Rule, size_t BLOCKSIZE = 1024>
template <typename T, typename Rule, typename ALLOCATOR, size_t BLOCKSIZE = 1024>
__global__ void loop_compact(split::SplitVector<T, split::split_unified_allocator<T>>& inputVec,
split::SplitVector<T, split::split_unified_allocator<T>>& outputVec, Rule rule) {
split::SplitVector<T, ALLOCATOR>& outputVec, Rule rule) {
// This must be equal to at least both WARPLENGTH and MAX_BLOCKSIZE/WARPLENGTH
__shared__ uint32_t warpSums[WARPLENGTH];
__shared__ uint32_t outputCount;
Expand Down Expand Up @@ -797,9 +797,9 @@ __global__ void loop_compact(split::SplitVector<T, split::split_unified_allocato
outputVec.device_resize(outputSize);
}
}
template <typename T, typename U, typename Rule, size_t BLOCKSIZE = 1024>
template <typename T, typename U, typename Rule, typename ALLOCATOR, size_t BLOCKSIZE = 1024>
__global__ void loop_compact_keys(split::SplitVector<T, split::split_unified_allocator<T>>& inputVec,
split::SplitVector<U, split::split_unified_allocator<U>>& outputVec, Rule rule) {
split::SplitVector<U, ALLOCATOR>& outputVec, Rule rule) {
// This must be equal to at least both WARPLENGTH and MAX_BLOCKSIZE/WARPLENGTH
__shared__ uint32_t warpSums[WARPLENGTH];
__shared__ uint32_t outputCount;
Expand Down Expand Up @@ -988,6 +988,24 @@ uint32_t copy_if_raw(T* input, T* output, size_t size, Rule rule, size_t nBlocks
return numel;
}

/**
* @brief Extraction routines using just a single block.
These methods assume splitvectors are fully allocated on UM or Device.
*/

template <typename T, typename Rule, typename ALLOCATOR, size_t BLOCKSIZE = 1024, size_t WARP = WARPLENGTH>
void copy_if_loop(split::SplitVector<T, split::split_unified_allocator<T>>& input,
split::SplitVector<T, ALLOCATOR>& output, Rule rule,
split_gpuStream_t s = 0) {
#ifdef HASHINATOR_DEBUG
bool input_ok = isDeviceAccessible(reinterpret_cast<void*>(&input));
bool output_ok = isDeviceAccessible(reinterpret_cast<void*>(&output));
assert((input_ok && output_ok) &&
"This method supports splitvectors dynamically allocated on device or unified memory!");
#endif
split::tools::loop_compact<<<1, BLOCKSIZE, 0, s>>>(input, output, rule);
}

/**
* @brief Extraction routines using just a single block.
These methods assume splitvectors are fully allocated on UM or Device.
Expand All @@ -1006,6 +1024,19 @@ void copy_if_loop(split::SplitVector<T, split::split_unified_allocator<T>>& inpu
split::tools::loop_compact<<<1, BLOCKSIZE, 0, s>>>(input, output, rule);
}

template <typename T, typename U, typename Rule, typename ALLOCATOR, size_t BLOCKSIZE = 1024, size_t WARP = WARPLENGTH>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same question for all these pretty much

Copy link
Author

@MikaelHuppunen MikaelHuppunen Feb 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The functions would then break if ALLOCATOR is defined but BLOCKSIZE and WARP are not (or the other way around depending on the order of the arguments).

void copy_if_keys_loop(split::SplitVector<T, split::split_unified_allocator<T>>& input,
split::SplitVector<U, ALLOCATOR>& output, Rule rule,
split_gpuStream_t s = 0) {
#ifdef HASHINATOR_DEBUG
bool input_ok = isDeviceAccessible(reinterpret_cast<void*>(&input));
bool output_ok = isDeviceAccessible(reinterpret_cast<void*>(&output));
assert((input_ok && output_ok) &&
"This method supports splitvectors dynamically allocated on device or unified memory!");
#endif
split::tools::loop_compact_keys<<<1, BLOCKSIZE, 0, s>>>(input, output, rule);
}

template <typename T, typename U, typename Rule, size_t BLOCKSIZE = 1024, size_t WARP = WARPLENGTH>
void copy_if_keys_loop(split::SplitVector<T, split::split_unified_allocator<T>>& input,
split::SplitVector<U, split::split_unified_allocator<U>>& output, Rule rule,
Expand Down