Skip to content

Commit

Permalink
Added kernel submission mode for tracers, PTI starts to require Level…
Browse files Browse the repository at this point in the history
… Zero 1.2 runtime, update MDAPI dependencies
  • Loading branch information
anton-v-gorshkov committed Dec 8, 2021
1 parent f9253f2 commit a386025
Show file tree
Hide file tree
Showing 33 changed files with 743 additions and 284 deletions.
4 changes: 2 additions & 2 deletions SOFTWARE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ metrics-discovery:
- sudo make install
- sudo make package
- sudo dpkg -i *.deb
commit: 7c416548fe13274effb795fcee55d099aee41b3f
commit: 8334347b2e61c339e1118680938470172dd38cb4
url: https://github.com/intel/metrics-discovery
metrics-library:
github:
Expand All @@ -28,7 +28,7 @@ metrics-library:
- make -j$(nproc)
- sudo make package
- sudo dpkg -i *.deb
commit: 0b05eb3138d3f0492c20111599d9a9a5af4a065f
commit: 3fd6eb0544fadcec2ac762aedee7c2d5d6479feb
url: https://github.com/intel/metrics-library
gtpin:
archive:
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.41.1
0.42.0
2 changes: 1 addition & 1 deletion build_utils/get_md_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import build_utils

url = "https://github.com/intel/metrics-discovery.git"
commit = "7c416548fe13274effb795fcee55d099aee41b3f"
commit = "8334347b2e61c339e1118680938470172dd38cb4"

def main():
if len(sys.argv) < 3:
Expand Down
7 changes: 3 additions & 4 deletions chapters/device_activity_tracing/LevelZero.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,16 @@ Host timestamp value corresponds to `CLOCK_MONOTONIC_RAW` on Linux or `QueryPerf
Note that the number of valid bits for the device timestamp returned by `zeDeviceGetGlobalTimestamps` is `timestampValidBits`, while the global kernel timastamp returned by `zeEventQueryKernelTimestamp` has `kernelTimestampValidBits` (both values are fields of `ze_device_properties_t`). And currently `kernelTimestampValidBits` is less then `timestampValidBits`, so to map kernels into CPU timeline one may need to truncate device timestamp to `kernelTimestampValidBits`:
```cpp
ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, };
ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, };
ze_result_t status = zeDeviceGetProperties(device, &props);
assert(status == ZE_RESULT_SUCCESS);
uint64_t mask = (1ull << props.kernelTimestampValidBits) - 1ull;
uint64_t kernel_timestamp = (device_timestamp & mask);
```
To convert GPU cycles into seconds one may use `timerResolution` field from `ze_device_properties_t` structure, that represents cycles per second starting from Level Zero 1.1:
To convert GPU cycles into seconds one may use `timerResolution` field from `ze_device_properties_t` structure, that represents cycles per second starting from Level Zero 1.2:
```cpp
ze_device_properties_t props{};
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, };
ze_result_t status = zeDeviceGetProperties(device, &props);
assert(status == ZE_RESULT_SUCCESS);

Expand Down
14 changes: 7 additions & 7 deletions chapters/metrics_collection/MetricsDiscoveryAPI.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ md::OpenAdapterGroup_fn OpenAdapterGroup =
(md::OpenAdapterGroup_fn)dlsym(handle, "OpenAdapterGroup");
assert(OpenAdapterGroup != nullptr);

md::IAdapterGroup_1_9* adapter_group = nullptr;
md::IAdapterGroupLatest* adapter_group = nullptr;
status = OpenAdapterGroup(&adapter_group);
PTI_ASSERT(status == md::CC_OK);

md::IAdapter_1_9* adapter = adapter_group->GetAdapter(0 /* device id*/);
md::IAdapterLatest* adapter = adapter_group->GetAdapter(0 /* device id*/);
PTI_ASSERT(adapter != nullptr);

uint32_t sub_devices_count = adapter->GetParams()->SubDevicesCount;
Expand Down Expand Up @@ -113,20 +113,20 @@ Every group, set and seprate metric contain a list of properties that helps to d
In addition to metrics, each set includes so called *information* items, that will be collected along with the metrics and show e.g. time when sample was collected, GPU core frequency, report identifier and others.
```cpp
for (uint32_t gid = 0; gid < device->GetParams()->ConcurrentGroupsCount; ++gid) {
md::IConcurrentGroup_1_5* group = device->GetConcurrentGroup(gid);
md::IConcurrentGroupLatest* group = device->GetConcurrentGroup(gid);
assert(group != nullptr);
std::cout << "Metric Group " << gid << ": " <<
group->GetParams()->SymbolName << std::endl;

for (uint32_t sid = 0; sid < group->GetParams()->MetricSetsCount; ++sid) {
md::IMetricSet_1_5* set = group->GetMetricSet(sid);
md::IMetricSetLatest* set = group->GetMetricSet(sid);
assert(set != nullptr);
std::cout << "\tMetric Set " << sid << ": " <<
set->GetParams()->SymbolName << " (" << set->GetParams()->ShortName <<
")" << std::endl;

for (uint32_t mid = 0; mid < set->GetParams()->MetricsCount; ++mid) {
md::IMetric_1_0* metric = set->GetMetric(mid);
md::IMetricLatest* metric = set->GetMetric(mid);
assert(metric != nullptr);
std::cout << "\t\tMetric " << mid << ": " <<
metric->GetParams()->SymbolName << " (" <<
Expand Down Expand Up @@ -160,7 +160,7 @@ md::TCompletionCode status = md::CC_OK;
Next, one should set a type of collection for target metric set. MD library allows to work with multiple APIs, like OpenGL, DirectX, Metal and others, so it's needed to determine explicitely which type of API one wants to employ. In addition to exact API, it's possible to enable the
most general collection using `API_TYPE_IOSTREAM` flag.
```cpp
md::IMetricSet_1_5* set; // target metric set, see Enumeration section
md::IMetricSetLatest* set; // target metric set, see Enumeration section
status = set->SetApiFiltering(md::API_TYPE_IOSTREAM);
assert(status == md::CC_OK);
```
Expand All @@ -171,7 +171,7 @@ As a result, both `sampling_interval` and `buffer_size` may be updated by the va
Note, that the call may fail due to lack of access rights for the current user (try "root" on Linux), or too small sampling interval value. Refer to
[Metrics Discovery (MD) API](https://github.com/intel/metrics-discovery) project to get more information.
```cpp
md::IConcurrentGroup_1_5* group; // target metric group, see Enumeration section
md::IConcurrentGroupLatest* group; // target metric group, see Enumeration section
status = group->OpenIoStream(set, 0, &sampling_interval, &buffer_size);
assert (status == md::CC_OK && buffer_size > 0);
Expand Down
18 changes: 9 additions & 9 deletions samples/cl_gpu_metrics/cl_metric_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ class ClMetricCollector {
return nullptr;
}

md::IConcurrentGroup_1_5* group =
md::IConcurrentGroupLatest* group =
metric_device->FindMetricGroup(set_name);
md::IMetricSet_1_5* set = metric_device->FindMetricSet(set_name);
md::IMetricSetLatest* set = metric_device->FindMetricSet(set_name);
if (group == nullptr || set == nullptr) {
std::cerr << "[WARNING] Metric set is not found: " <<
set_name << std::endl;
Expand Down Expand Up @@ -87,7 +87,7 @@ class ClMetricCollector {
PTI_ASSERT(set_ != nullptr);

for (uint32_t mid = 0; mid < set_->GetParams()->MetricsCount; ++mid) {
md::IMetric_1_0* metric = set_->GetMetric(mid);
md::IMetricLatest* metric = set_->GetMetric(mid);
PTI_ASSERT(metric != nullptr);
if (strcmp(metric->GetParams()->SymbolName, name) == 0) {
return mid;
Expand Down Expand Up @@ -155,8 +155,8 @@ class ClMetricCollector {

private: // Implementation Details
ClMetricCollector(
MetricDevice* device, md::IConcurrentGroup_1_5* group,
md::IMetricSet_1_5* set)
MetricDevice* device, md::IConcurrentGroupLatest* group,
md::IMetricSetLatest* set)
: device_(device), group_(group), set_(set) {
PTI_ASSERT(device_ != nullptr);
PTI_ASSERT(group_ != nullptr);
Expand Down Expand Up @@ -202,8 +202,8 @@ class ClMetricCollector {
PTI_ASSERT(collector->group_ != nullptr);
PTI_ASSERT(collector->set_ != nullptr);

md::IConcurrentGroup_1_5* group = collector->group_;
md::IMetricSet_1_5* set = collector->set_;
md::IConcurrentGroupLatest* group = collector->group_;
md::IMetricSetLatest* set = collector->set_;

uint32_t sampling_interval = 100000; // nanoseconds
uint32_t buffer_size = 0; // defined by MDAPI
Expand Down Expand Up @@ -249,8 +249,8 @@ class ClMetricCollector {

private: // Data
MetricDevice* device_ = nullptr;
md::IConcurrentGroup_1_5* group_ = nullptr;
md::IMetricSet_1_5* set_ = nullptr;
md::IConcurrentGroupLatest* group_ = nullptr;
md::IMetricSetLatest* set_ = nullptr;

std::atomic<CollectorState> collector_state_{COLLECTOR_STATE_IDLE};
std::thread* collector_thread_ = nullptr;
Expand Down
14 changes: 7 additions & 7 deletions samples/cl_gpu_query/cl_metric_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ class ClMetricCollector {
return nullptr;
}

md::IConcurrentGroup_1_5* group =
md::IConcurrentGroupLatest* group =
metric_device->FindMetricGroup(set_name);
md::IMetricSet_1_5* set = metric_device->FindMetricSet(set_name);
md::IMetricSetLatest* set = metric_device->FindMetricSet(set_name);
if (group == nullptr || set == nullptr) {
std::cerr << "[WARNING] Metric set is not found: " <<
set_name << std::endl;
Expand Down Expand Up @@ -204,8 +204,8 @@ class ClMetricCollector {
private: // Implementation Details
ClMetricCollector(
MetricDevice* device,
md::IConcurrentGroup_1_5* group,
md::IMetricSet_1_5* set,
md::IConcurrentGroupLatest* group,
md::IMetricSetLatest* set,
decltype(clCreatePerfCountersCommandQueueINTEL)* ptr)
: device_(device), group_(group), set_(set),
clCreatePerfCountersCommandQueue_(ptr) {
Expand Down Expand Up @@ -312,7 +312,7 @@ class ClMetricCollector {
PTI_ASSERT(set_ != nullptr);

for (uint32_t mid = 0; mid < set_->GetParams()->MetricsCount; ++mid) {
md::IMetric_1_0* metric = set_->GetMetric(mid);
md::IMetricLatest* metric = set_->GetMetric(mid);
PTI_ASSERT(metric != nullptr);
if (strcmp(metric->GetParams()->SymbolName, name) == 0) {
return mid;
Expand Down Expand Up @@ -520,8 +520,8 @@ class ClMetricCollector {
ClKernelInfoMap kernel_info_map_;

MetricDevice* device_ = nullptr;
md::IConcurrentGroup_1_5* group_ = nullptr;
md::IMetricSet_1_5* set_ = nullptr;
md::IConcurrentGroupLatest* group_ = nullptr;
md::IMetricSetLatest* set_ = nullptr;

decltype(clCreatePerfCountersCommandQueueINTEL)*
clCreatePerfCountersCommandQueue_ = nullptr;
Expand Down
5 changes: 1 addition & 4 deletions samples/ze_gemm/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ static float RunAndCheck(ze_kernel_handle_t kernel,
PTI_ASSERT(status == ZE_RESULT_SUCCESS);

ze_device_properties_t props{};
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2;
status = zeDeviceGetProperties(device, &props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);

Expand Down Expand Up @@ -248,9 +248,6 @@ static void Compute(ze_device_handle_t device,
}

int main(int argc, char* argv[]) {
utils::SetEnv("NEOReadDebugKeys", "1");
utils::SetEnv("UseCyclesPerSecondTimer", "1");

ze_result_t status = ZE_RESULT_SUCCESS;
status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
Expand Down
2 changes: 0 additions & 2 deletions samples/ze_hot_kernels/tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ __declspec(dllexport)
#endif
void SetToolEnv() {
utils::SetEnv("ZE_ENABLE_TRACING_LAYER", "1");
utils::SetEnv("NEOReadDebugKeys", "1");
utils::SetEnv("UseCyclesPerSecondTimer", "1");
}

// Internal Tool Functionality ////////////////////////////////////////////////
Expand Down
13 changes: 7 additions & 6 deletions samples/ze_info/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,12 @@ void PrintDeviceInfo() {
"Number of devices " << device_list.size() << std::endl;

for (size_t j = 0; j < device_list.size(); ++j) {
ze_device_properties_t device_props{
ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES, };
ze_structure_type_t stype =
version >= ZE_API_VERSION_1_2 ?
ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2 :
ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;

ze_device_properties_t device_props{stype, };
status = zeDeviceGetProperties(device_list[j], &device_props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);

Expand Down Expand Up @@ -267,7 +271,7 @@ void PrintDeviceInfo() {
std::cout << std::setw(TEXT_WIDTH) << std::left <<
std::string() + TAB + "Timer Resolution " <<
device_props.timerResolution;
if (version >= ZE_API_VERSION_1_0) {
if (version < ZE_API_VERSION_1_2) {
std::cout << "ns";
} else {
std::cout << "clks";
Expand Down Expand Up @@ -373,9 +377,6 @@ int main(int argc, char *argv[]) {
}
}

utils::SetEnv("NEOReadDebugKeys", "1");
utils::SetEnv("UseCyclesPerSecondTimer", "1");

ze_result_t status = ZE_RESULT_SUCCESS;
status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
Expand Down
2 changes: 0 additions & 2 deletions samples/ze_metric_streamer/tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ __declspec(dllexport)
void SetToolEnv() {
utils::SetEnv("ZE_ENABLE_TRACING_LAYER", "1");
utils::SetEnv("ZET_ENABLE_METRICS", "1");
utils::SetEnv("NEOReadDebugKeys", "1");
utils::SetEnv("UseCyclesPerSecondTimer", "1");
}

// Internal Tool Functionality ////////////////////////////////////////////////
Expand Down
6 changes: 3 additions & 3 deletions tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
tools = [["gpuinfo", "-l", "-i", "-m"],
["sysmon", "-p", "-l", "-d"],
["onetrace",
"-c", "-h", "-d", "-v", "-t",
"-c", "-h", "-d", "-v", "-t", "-s",
"--kernels-per-tile",
"--chrome-call-logging",
"--chrome-device-timeline",
Expand All @@ -43,15 +43,15 @@
"--conditional-collection",
"cl", "ze", "omp"],
["cl_tracer",
"-c", "-h", "-d", "-v", "-t",
"-c", "-h", "-d", "-v", "-t", "-s",
"--chrome-call-logging",
"--chrome-device-timeline",
"--chrome-kernel-timeline",
"--chrome-device-stages",
"--conditional-collection",
"gpu", "dpc", "omp"],
["ze_tracer",
"-c", "-h", "-d", "-v", "-t",
"-c", "-h", "-d", "-v", "-t", "-s",
"--kernels-per-tile",
"--chrome-call-logging",
"--chrome-device-timeline",
Expand Down
6 changes: 5 additions & 1 deletion tests/tools/cl_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def run(path, option):
if option == "gpu":
command = [file_name_prefix + "cl_tracer" + file_extention,\
"-h", "-d", "-t", app_file, "gpu", "1024", "1"]
elif option == "--conditional-collection":
elif option == "-v" or option == "--conditional-collection":
command = [file_name_prefix + "cl_tracer" + file_extention,\
"-d", option, app_file, "cpu", "1024", "1"]
else:
Expand Down Expand Up @@ -98,6 +98,10 @@ def main(option):
option = "-d"
if len(sys.argv) > 1 and sys.argv[1] == "-t":
option = "-t"
if len(sys.argv) > 1 and sys.argv[1] == "-s":
option = "-s"
if len(sys.argv) > 1 and sys.argv[1] == "-v":
option = "-v"
if len(sys.argv) > 1 and sys.argv[1] == "--chrome-call-logging":
option = "--chrome-call-logging"
if len(sys.argv) > 1 and sys.argv[1] == "--chrome-device-timeline":
Expand Down
6 changes: 5 additions & 1 deletion tests/tools/onetrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def run(path, option):
app_folder = utils.get_sample_executable_path("omp_gemm")
app_file = os.path.join(app_folder, "omp_gemm")
command = ["./onetrace", "-h", "-d", "-t", app_file, "gpu", "1024", "1"]
elif option == "--kernels-per-tile" or option == "--conditional-collection":
elif option == "-v" or option == "--kernels-per-tile" or option == "--conditional-collection":
app_folder = utils.get_sample_executable_path("dpc_gemm")
app_file = os.path.join(app_folder, "dpc_gemm")
command = ["./onetrace", "-d", option, app_file, "gpu", "1024", "1"]
Expand Down Expand Up @@ -84,6 +84,10 @@ def main(option):
option = "-d"
if len(sys.argv) > 1 and sys.argv[1] == "-t":
option = "-t"
if len(sys.argv) > 1 and sys.argv[1] == "-s":
option = "-s"
if len(sys.argv) > 1 and sys.argv[1] == "-v":
option = "-v"
if len(sys.argv) > 1 and sys.argv[1] == "--kernels-per-tile":
option = "--kernels-per-tile"
if len(sys.argv) > 1 and sys.argv[1] == "--chrome-call-logging":
Expand Down
6 changes: 5 additions & 1 deletion tests/tools/ze_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def run(path, option):
app_folder = utils.get_sample_executable_path("omp_gemm")
app_file = os.path.join(app_folder, "omp_gemm")
command = ["./ze_tracer", "-h", "-d", "-t", app_file, "gpu", "1024", "1"]
elif option == "--kernels-per-tile" or option == "--conditional-collection":
elif option == "-v" or option == "--kernels-per-tile" or option == "--conditional-collection":
app_folder = utils.get_sample_executable_path("ze_gemm")
app_file = os.path.join(app_folder, "ze_gemm")
command = ["./ze_tracer", "-d", option, app_file, "1024", "1"]
Expand Down Expand Up @@ -77,6 +77,10 @@ def main(option):
option = "-d"
if len(sys.argv) > 1 and sys.argv[1] == "-t":
option = "-t"
if len(sys.argv) > 1 and sys.argv[1] == "-s":
option = "-s"
if len(sys.argv) > 1 and sys.argv[1] == "-v":
option = "-v"
if len(sys.argv) > 1 and sys.argv[1] == "--kernels-per-tile":
option = "--kernels-per-tile"
if len(sys.argv) > 1 and sys.argv[1] == "--chrome-call-logging":
Expand Down
Loading

0 comments on commit a386025

Please sign in to comment.