-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.hpp
114 lines (90 loc) · 4.17 KB
/
util.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#pragma once
#include <chrono>
#include <functional>
#include <iostream>
inline void print_human_readble_timeusage(double throughput, long long avgDurationMicroSec) {
// Print human readable message
if (avgDurationMicroSec < 1000) {
std::cout << throughput << " iter/s @ avg: " << avgDurationMicroSec << " microseconds" << std::endl;
} else if (avgDurationMicroSec < 1000000) {
std::cout << throughput << " iter/s @ avg: " << avgDurationMicroSec / 1000.0 << " milliseconds" << std::endl;
} else {
std::cout << throughput << " iter/s @ avg: " << avgDurationMicroSec / 1000000.0 << " seconds" << std::endl;
}
}
inline void benchmark_func(const std::function<void()> &func, int numIterations) {
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < numIterations; ++i) {
func(); // Call the function
}
auto end = std::chrono::high_resolution_clock::now();
auto totalDuration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto throughput = numIterations / (totalDuration.count() / 1000000.0);
auto avgDuration = totalDuration.count() / numIterations;
print_human_readble_timeusage(throughput, avgDuration);
}
inline void benchmark_func(const std::function<void()> &func, std::chrono::seconds duration) {
auto start = std::chrono::high_resolution_clock::now();
auto end = start + duration;
int numIterations = 0;
while (std::chrono::high_resolution_clock::now() < end) {
func(); // Call the function
++numIterations;
}
auto actualEnd = std::chrono::high_resolution_clock::now();
auto totalDuration = std::chrono::duration_cast<std::chrono::microseconds>(actualEnd - start);
auto throughput = numIterations / (totalDuration.count() / 1000000.0);
auto avgDuration = totalDuration.count() / numIterations;
print_human_readble_timeusage(throughput, avgDuration);
}
inline void benchmark_func(const std::function<void()> &func) {
benchmark_func(func, std::chrono::seconds(10));
}
using sycl_kernel = std::function<void(sycl::queue &)>;
inline std::chrono::microseconds benchmark_sycl_kernel(
const sycl_kernel &submitKernel, sycl::queue &queue, int numIterations) {
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < numIterations; ++i) {
submitKernel(queue); // Call the function
}
queue.wait();
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start);
}
inline void benchmark_sycl_kernel(
const sycl_kernel &submitKernel, sycl::queue &queue, std::chrono::seconds duration) {
auto kb = sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue.get_context());
auto iter1_sec = benchmark_sycl_kernel(submitKernel, queue, 1).count() / 1000000.0;
auto iter100_sec = benchmark_sycl_kernel(submitKernel, queue, 100).count() / 1000000.0;
auto sec_per_iter = (iter100_sec - iter1_sec) / 99.0;
auto numIterations = static_cast<int>(duration.count() / sec_per_iter);
auto totalDuration = benchmark_sycl_kernel(submitKernel, queue, numIterations);
auto throughput = numIterations / (totalDuration.count() / 1000000.0);
auto avgDuration = totalDuration.count() / numIterations;
print_human_readble_timeusage(throughput, avgDuration);
}
inline void benchmark_sycl_kernel(const sycl_kernel &submitKernel, sycl::queue &queue) {
benchmark_sycl_kernel(submitKernel, queue, std::chrono::seconds(10));
}
inline bool floatVectorEquals(const std::vector<float> &v1, const std::vector<float> &v2, float tolerance = 1e-5f) {
if (v1.size() != v2.size()) {
return false;
}
for (size_t i = 0; i < v1.size(); ++i) {
if (std::abs(v1[i] - v2[i]) / v1[i] > tolerance) {
return false;
}
}
return true;
}
inline int gpu_selector_by_cu(const sycl::device &dev) {
int priorty = 0;
if (dev.is_gpu()) {
unsigned int cu = dev.get_info<sycl::info::device::max_compute_units>();
priorty += static_cast<int>(cu);
}
if (dev.get_backend() == sycl::backend::ext_oneapi_level_zero) {
priorty += 1;
}
return priorty;
}