From df968c95687f15f871c9323d9325211669487bd2 Mon Sep 17 00:00:00 2001 From: avgorshk Date: Wed, 14 Apr 2021 11:37:51 +0300 Subject: [PATCH] Improve and rename gpuinfo, simplify cl_hot_functions sample --- README.md | 2 +- VERSION | 2 +- chapters/device_activity_tracing/OpenCL.md | 2 +- .../metrics_collection/MetricsDiscoveryAPI.md | 6 +- chapters/runtime_api_tracing/OpenCL.md | 8 +- samples/cl_hot_functions/README.md | 66 ++--- samples/cl_hot_functions/cl_api_collector.h | 205 --------------- samples/cl_hot_functions/tool.cc | 236 ++++++++++++------ tests/cl_hot_functions.py | 54 +--- tests/cl_tracer.py | 32 ++- tests/{gpu_info.py => gpuinfo.py} | 8 +- tests/onetrace.py | 44 +++- tests/run.py | 10 +- tests/ze_tracer.py | 34 ++- tools/gpu_info/README.md | 126 ---------- tools/{gpu_info => gpuinfo}/CMakeLists.txt | 12 +- tools/gpuinfo/README.md | 116 +++++++++ tools/{gpu_info => gpuinfo}/main.cc | 156 +++++------- 18 files changed, 488 insertions(+), 631 deletions(-) delete mode 100644 samples/cl_hot_functions/cl_api_collector.h rename tests/{gpu_info.py => gpuinfo.py} (93%) delete mode 100644 tools/gpu_info/README.md rename tools/{gpu_info => gpuinfo}/CMakeLists.txt (64%) create mode 100644 tools/gpuinfo/README.md rename tools/{gpu_info => gpuinfo}/main.cc (60%) diff --git a/README.md b/README.md index b58f8c9..810242d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ You may obtain a copy of the License at https://opensource.org/licenses/MIT - [onetrace](tools/onetrace) - host and device tracing tool for OpenCL(TM) and Level Zero backends with support of DPC++ (both for CPU and GPU) and OpenMP* GPU offload; - [ze_tracer](tools/ze_tracer) - "Swiss army knife" for Level Zero API call tracing and profiling (former ze_intercept); - [cl_tracer](tools/cl_tracer) - "Swiss army knife" for OpenCL(TM) API call tracing and profiling; - - [gpu_info](tools/gpu_info) - provides basic information about the GPUs installed in a system, and the list of HW metrics one can collect for it; + - [gpuinfo](tools/gpuinfo) - provides basic information about the GPUs installed in a system, and the list of HW metrics one can collect for it; ## Sample Tools & Utilities - tools for OpenCL(TM), DPC++ (with OpenCL(TM) backend) and OpenMP* GPU offload (with OpenCL(TM) backend): diff --git a/VERSION b/VERSION index 9d26321..1db0ede 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.20.1 \ No newline at end of file +0.21.0 \ No newline at end of file diff --git a/chapters/device_activity_tracing/OpenCL.md b/chapters/device_activity_tracing/OpenCL.md index b8988fe..083b905 100644 --- a/chapters/device_activity_tracing/OpenCL.md +++ b/chapters/device_activity_tracing/OpenCL.md @@ -20,7 +20,7 @@ Intel(R) Xeon(R) Processor / Intel(R) Core(TM) Processor (CPU) Runtimes use `Que - Windows **Supported HW**: -- Intel(R) Processor Graphics GEN9+ +- Any **Needed Headers**: - OpenCL(TM) [headers](https://github.com/KhronosGroup/OpenCL-Headers) diff --git a/chapters/metrics_collection/MetricsDiscoveryAPI.md b/chapters/metrics_collection/MetricsDiscoveryAPI.md index e82a7cc..7eced43 100644 --- a/chapters/metrics_collection/MetricsDiscoveryAPI.md +++ b/chapters/metrics_collection/MetricsDiscoveryAPI.md @@ -295,5 +295,7 @@ LD_LIBRARY_PATH=$LD_LIBRARY_PATH: ./ [Compute Architecture Manuals](https://software.intel.com/en-us/articles/intel-graphics-developers-guides) to learn more on Intel(R) Processor Graphics architecture ## Samples -- [GPU Info](../../tools/gpu_info) -- [GPU Metrics for OpenCL(TM)](../../samples/cl_gpu_metrics) \ No newline at end of file +- [GPU Metrics for OpenCL(TM)](../../samples/cl_gpu_metrics) + +## Tools +- [GPU Info](../../tools/gpuinfo) \ No newline at end of file diff --git a/chapters/runtime_api_tracing/OpenCL.md b/chapters/runtime_api_tracing/OpenCL.md index 288e1ae..527a195 100644 --- a/chapters/runtime_api_tracing/OpenCL.md +++ b/chapters/runtime_api_tracing/OpenCL.md @@ -49,7 +49,7 @@ cl_int CL_API_CALL clGetTracingStateINTEL( - Windows **Supported HW**: -- Any +- Intel(R) Processor Graphics, Intel(R) Xeon(R) Processor and Intel(R) Core(TM) Processor **Needed Headers**: - OpenCL(TM) [headers](https://github.com/KhronosGroup/OpenCL-Headers) @@ -115,4 +115,8 @@ void Callback(cl_function_id fid, - [OpenCL(TM) Hot Functions](../../samples/cl_hot_functions) - [OpenCL(TM) Hot Kernels](../../samples/cl_hot_kernels) - [OpenCL(TM) Debug Info](../../samples/cl_debug_info) -- [OpenCL(TM) GPU Metrics](../../samples/cl_gpu_metrics) \ No newline at end of file +- [OpenCL(TM) GPU Metrics](../../samples/cl_gpu_metrics) + +## Tools +- [OpenCL(TM) Tracer](../../tools/cl_tracer) +- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace) \ No newline at end of file diff --git a/samples/cl_hot_functions/README.md b/samples/cl_hot_functions/README.md index 471ebbd..5bc1267 100644 --- a/samples/cl_hot_functions/README.md +++ b/samples/cl_hot_functions/README.md @@ -1,43 +1,30 @@ # OpenCL(TM) Hot Functions ## Overview -This sample is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count. +This is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count for GPU device. As a result, table like the following will be printed. ``` -=== API Timing Results: === - -Total Execution Time (ns): 363687486 -Total API Time for CPU backend (ns): 524 -Total API Time for GPU backend (ns): 355355363 - -== CPU Backend: == - - Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) -clGetDeviceIDs, 1, 524, 100.00, 524, 524, 524 - -== GPU Backend: == - - Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) - clBuildProgram, 1, 173888026, 48.93, 173888026, 173888026, 173888026 - clFinish, 4, 172908147, 48.66, 43227036, 42711785, 44318785 - clEnqueueWriteBuffer, 8, 4636256, 1.30, 579532, 207825, 1864890 - clEnqueueReadBuffer, 4, 2051244, 0.58, 512811, 498662, 542971 - clEnqueueNDRangeKernel, 4, 1623139, 0.46, 405784, 236120, 609050 - clReleaseMemObject, 12, 95182, 0.03, 7931, 3525, 16436 - clCreateBuffer, 12, 81056, 0.02, 6754, 2511, 16990 - clSetKernelArg, 16, 24515, 0.01, 1532, 141, 7038 - clGetEventProfilingInfo, 8, 13139, 0.00, 1642, 103, 3288 - clCreateContext, 1, 12680, 0.00, 12680, 12680, 12680 - clReleaseProgram, 1, 9503, 0.00, 9503, 9503, 9503 - clCreateProgramWithSource, 1, 3880, 0.00, 3880, 3880, 3880 - clCreateKernel, 1, 2941, 0.00, 2941, 2941, 2941 - clReleaseKernel, 1, 1679, 0.00, 1679, 1679, 1679 - clGetKernelInfo, 4, 1617, 0.00, 404, 190, 552 -clCreateCommandQueueWithProperties, 1, 1388, 0.00, 1388, 1388, 1388 - clGetDeviceIDs, 2, 311, 0.00, 155, 138, 173 - clReleaseCommandQueue, 1, 270, 0.00, 270, 270, 270 - clGetDeviceInfo, 2, 227, 0.00, 113, 103, 124 - clReleaseContext, 1, 163, 0.00, 163, 163, 163 + Function, Calls, Time (ns), Average (ns) + clBuildProgram, 1, 183549198, 183549198 + clCreateBuffer, 12, 108285, 9023 +clCreateCommandQueueWithProperties, 1, 1265, 1265 + clCreateContext, 1, 9322, 9322 + clCreateKernel, 1, 3428, 3428 + clCreateProgramWithSource, 1, 3219, 3219 + clEnqueueNDRangeKernel, 4, 2237845, 559461 + clEnqueueReadBuffer, 4, 2358133, 589533 + clEnqueueWriteBuffer, 8, 5719781, 714972 + clFinish, 4, 174064236, 43516059 + clGetDeviceIDs, 2, 362, 181 + clGetDeviceInfo, 2, 354, 177 + clGetEventProfilingInfo, 8, 14198, 1774 + clGetKernelInfo, 4, 2411, 602 + clReleaseCommandQueue, 1, 1046, 1046 + clReleaseContext, 1, 173, 173 + clReleaseKernel, 1, 2741, 2741 + clReleaseMemObject, 12, 110922, 9243 + clReleaseProgram, 1, 11561, 11561 + clSetKernelArg, 16, 75282, 4705 ``` ## Supported OS - Linux @@ -48,8 +35,7 @@ clCreateCommandQueueWithProperties, 1, 1388, 0.00, - [Git](https://git-scm.com/) (version 1.8 and above) - [Python](https://www.python.org/) (version 2.7 and above) - [OpenCL(TM) ICD Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader) -- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) to run on GPU -- [Intel(R) Xeon(R) Processor / Intel(R) Core(TM) Processor (CPU) Runtimes](https://software.intel.com/en-us/articles/opencl-drivers#cpu-section) to run on CPU +- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) ## Build and Run ### Linux @@ -65,10 +51,9 @@ Use this command line to run the tool: ```sh ./cl_hot_functions ``` -One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application: +One may use [cl_gemm](../cl_gemm) as target application: ```sh ./cl_hot_functions ../../cl_gemm/build/cl_gemm -./cl_hot_functions ../../dpc_gemm/build/dpc_gemm cpu ``` ### Windows Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample: @@ -83,8 +68,7 @@ Use this command line to run the tool: ```sh cl_hot_functions.exe ``` -One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application: +One may use [cl_gemm](../cl_gemm) as target application: ```sh cl_hot_functions.exe ..\..\cl_gemm\build\cl_gemm.exe -cl_hot_functions.exe ..\..\dpc_gemm\build\dpc_gemm.exe cpu ``` \ No newline at end of file diff --git a/samples/cl_hot_functions/cl_api_collector.h b/samples/cl_hot_functions/cl_api_collector.h deleted file mode 100644 index 18ac7f8..0000000 --- a/samples/cl_hot_functions/cl_api_collector.h +++ /dev/null @@ -1,205 +0,0 @@ -//============================================================== -// Copyright (C) Intel Corporation -// -// SPDX-License-Identifier: MIT -// ============================================================= - -#ifndef PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ -#define PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ - -#include -#include -#include -#include -#include -#include - -#include "cl_api_tracer.h" -#include "cl_utils.h" - -struct ClFunction { - uint64_t total_time; - uint64_t min_time; - uint64_t max_time; - uint64_t call_count; - - bool operator>(const ClFunction& r) const { - if (total_time != r.total_time) { - return total_time > r.total_time; - } - return call_count > r.call_count; - } - - bool operator!=(const ClFunction& r) const { - if (total_time == r.total_time) { - return call_count != r.call_count; - } - return true; - } -}; - -using ClFunctionInfoMap = std::map; - -class ClApiCollector { - public: // User Interface - static ClApiCollector* Create(cl_device_id device) { - PTI_ASSERT(device != nullptr); - - ClApiCollector* collector = new ClApiCollector(); - PTI_ASSERT(collector != nullptr); - - ClApiTracer* tracer = new ClApiTracer(device, Callback, collector); - if (tracer == nullptr || !tracer->IsValid()) { - std::cerr << "[WARNING] Unable to create OpenCL tracer " << - "for target device" << std::endl; - if (tracer != nullptr) { - delete tracer; - delete collector; - } - return nullptr; - } - - collector->EnableTracing(tracer); - return collector; - } - - ~ClApiCollector() { - if (tracer_ != nullptr) { - delete tracer_; - } - } - - void DisableTracing() { - PTI_ASSERT(tracer_ != nullptr); - bool disabled = tracer_->Disable(); - PTI_ASSERT(disabled); - } - - const ClFunctionInfoMap& GetFunctionInfoMap() const { - return function_info_map_; - } - - ClApiCollector(const ClApiCollector& copy) = delete; - ClApiCollector& operator=(const ClApiCollector& copy) = delete; - - static void PrintFunctionsTable(const ClFunctionInfoMap& function_info_map) { - std::set< std::pair, - utils::Comparator > sorted_list( - function_info_map.begin(), function_info_map.end()); - - uint64_t total_duration = 0; - size_t max_name_length = kFunctionLength; - for (auto& value : sorted_list) { - total_duration += value.second.total_time; - if (value.first.size() > max_name_length) { - max_name_length = value.first.size(); - } - } - - if (total_duration == 0) { - return; - } - - std::cerr << std::setw(max_name_length) << "Function" << "," << - std::setw(kCallsLength) << "Calls" << "," << - std::setw(kTimeLength) << "Time (ns)" << "," << - std::setw(kPercentLength) << "Time (%)" << "," << - std::setw(kTimeLength) << "Average (ns)" << "," << - std::setw(kTimeLength) << "Min (ns)" << "," << - std::setw(kTimeLength) << "Max (ns)" << std::endl; - - for (auto& value : sorted_list) { - const std::string& function = value.first; - uint64_t call_count = value.second.call_count; - uint64_t duration = value.second.total_time; - uint64_t avg_duration = duration / call_count; - uint64_t min_duration = value.second.min_time; - uint64_t max_duration = value.second.max_time; - float percent_duration = 100.0f * duration / total_duration; - std::cerr << std::setw(max_name_length) << function << "," << - std::setw(kCallsLength) << call_count << "," << - std::setw(kTimeLength) << duration << "," << - std::setw(kPercentLength) << std::setprecision(2) << - std::fixed << percent_duration << "," << - std::setw(kTimeLength) << avg_duration << "," << - std::setw(kTimeLength) << min_duration << "," << - std::setw(kTimeLength) << max_duration << std::endl; - } - } - - private: // Implementation Details - ClApiCollector() {} - - void EnableTracing(ClApiTracer* tracer) { - PTI_ASSERT(tracer != nullptr); - tracer_ = tracer; - - for (int id = 0; id < CL_FUNCTION_COUNT; ++id) { - bool set = tracer_->SetTracingFunction(static_cast(id)); - PTI_ASSERT(set); - } - - bool enabled = tracer_->Enable(); - PTI_ASSERT(enabled); - } - - uint64_t GetTimestamp() const { - std::chrono::duration timestamp = - std::chrono::steady_clock::now() - base_time_; - return timestamp.count(); - } - - void AddFunctionTime(const std::string& name, uint64_t time) { - const std::lock_guard lock(lock_); - if (function_info_map_.count(name) == 0) { - function_info_map_[name] = {time, time, time, 1}; - } else { - ClFunction& function = function_info_map_[name]; - function.total_time += time; - if (time < function.min_time) { - function.min_time = time; - } - if (time > function.max_time) { - function.max_time = time; - } - ++function.call_count; - } - } - - private: // Callbacks - static void Callback( - cl_function_id function, - cl_callback_data* callback_data, - void* user_data) { - ClApiCollector* collector = reinterpret_cast(user_data); - PTI_ASSERT(collector != nullptr); - PTI_ASSERT(callback_data != nullptr); - PTI_ASSERT(callback_data->correlationData != nullptr); - - if (callback_data->site == CL_CALLBACK_SITE_ENTER) { - uint64_t& start_time = *reinterpret_cast( - callback_data->correlationData); - start_time = collector->GetTimestamp(); - } else { - uint64_t end_time = collector->GetTimestamp(); - uint64_t& start_time = *reinterpret_cast( - callback_data->correlationData); - collector->AddFunctionTime( - callback_data->functionName, end_time - start_time); - } - } - - private: // Data - ClApiTracer* tracer_ = nullptr; - std::chrono::time_point base_time_; - - std::mutex lock_; - ClFunctionInfoMap function_info_map_; - - static const uint32_t kFunctionLength = 10; - static const uint32_t kCallsLength = 12; - static const uint32_t kTimeLength = 20; - static const uint32_t kPercentLength = 10; -}; - -#endif // PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ \ No newline at end of file diff --git a/samples/cl_hot_functions/tool.cc b/samples/cl_hot_functions/tool.cc index 9bfd749..036242d 100644 --- a/samples/cl_hot_functions/tool.cc +++ b/samples/cl_hot_functions/tool.cc @@ -13,11 +13,25 @@ #include #include -#include "cl_api_collector.h" +#include -static ClApiCollector* cpu_collector = nullptr; -static ClApiCollector* gpu_collector = nullptr; -static std::chrono::steady_clock::time_point start; +#include "cl_utils.h" +#include "pti_assert.h" + +// Pointers to tracing functions +static decltype(clCreateTracingHandleINTEL)* clCreateTracingHandle = nullptr; +static decltype(clSetTracingPointINTEL)* clSetTracingPoint = nullptr; +static decltype(clDestroyTracingHandleINTEL)* clDestroyTracingHandle = nullptr; +static decltype(clEnableTracingINTEL)* clEnableTracing = nullptr; +static decltype(clDisableTracingINTEL)* clDisableTracing = nullptr; + +// Tracing handle +static cl_tracing_handle tracer = nullptr; + +// Function maps & mutex +static std::map function_time_map; +static std::map function_count_map; +std::mutex lock; // External Tool Interface //////////////////////////////////////////////////// @@ -47,110 +61,172 @@ void SetToolEnv() {} // Internal Tool Functionality //////////////////////////////////////////////// -static uint64_t CalculateTotalTime(ClApiCollector* collector) { - PTI_ASSERT(collector != nullptr); - uint64_t total_duration = 0; - - const ClFunctionInfoMap& function_info_map = collector->GetFunctionInfoMap(); - if (function_info_map.size() != 0) { - for (auto& value : function_info_map) { - total_duration += value.second.total_time; - } - } - - return total_duration; +static bool LoadTracingFunctions(cl_device_id device) { + PTI_ASSERT(device != nullptr); + + cl_int status = CL_SUCCESS; + + cl_platform_id platform = nullptr; + status = clGetDeviceInfo( + device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); + PTI_ASSERT(status == CL_SUCCESS); + + clCreateTracingHandle = + reinterpret_cast( + clGetExtensionFunctionAddressForPlatform( + platform, "clCreateTracingHandleINTEL")); + clSetTracingPoint = + reinterpret_cast( + clGetExtensionFunctionAddressForPlatform( + platform, "clSetTracingPointINTEL")); + clDestroyTracingHandle = + reinterpret_cast( + clGetExtensionFunctionAddressForPlatform( + platform, "clDestroyTracingHandleINTEL")); + clEnableTracing = + reinterpret_cast( + clGetExtensionFunctionAddressForPlatform( + platform, "clEnableTracingINTEL")); + clDisableTracing = + reinterpret_cast( + clGetExtensionFunctionAddressForPlatform( + platform, "clDisableTracingINTEL")); + + if (clCreateTracingHandle == nullptr || + clSetTracingPoint == nullptr || + clDestroyTracingHandle == nullptr || + clEnableTracing == nullptr || + clDisableTracing == nullptr) { + return false; + } + + return true; } -static void PrintDeviceTable( - ClApiCollector* collector, const char* device_type) { - PTI_ASSERT(collector != nullptr); - PTI_ASSERT(device_type != nullptr); - - uint64_t total_duration = CalculateTotalTime(collector); - if (total_duration > 0) { - std::cerr << std::endl; - std::cerr << "== " << device_type << " Backend: ==" << std::endl; - std::cerr << std::endl; - - const ClFunctionInfoMap& function_info_map = collector->GetFunctionInfoMap(); - PTI_ASSERT(function_info_map.size() > 0); - ClApiCollector::PrintFunctionsTable(function_info_map); +static void Callback( + cl_function_id function, + cl_callback_data* callback_data, + void* user_data) { + PTI_ASSERT(callback_data != nullptr); + PTI_ASSERT(callback_data->correlationData != nullptr); + + // Get current time point + std::chrono::duration time = + std::chrono::steady_clock::now().time_since_epoch(); + + if (callback_data->site == CL_CALLBACK_SITE_ENTER) { // Before the function + uint64_t& start_time = *reinterpret_cast( + callback_data->correlationData); + start_time = time.count(); + } else { // After the function + uint64_t end_time = time.count(); + uint64_t& start_time = *reinterpret_cast( + callback_data->correlationData); + + { + const std::lock_guard guard(lock); + + if (function_time_map.count(callback_data->functionName) == 0) { + function_time_map[callback_data->functionName] = + end_time - start_time; + } else { + function_time_map[callback_data->functionName] += + end_time - start_time; + } + + if (function_count_map.count(callback_data->functionName) == 0) { + function_count_map[callback_data->functionName] = 1; + } else { + function_count_map[callback_data->functionName] += 1; + } + } } } static void PrintResults() { - if (cpu_collector == nullptr && gpu_collector == nullptr) { + if (function_time_map.empty()) { return; } - std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - std::chrono::duration time = end - start; - - std::cerr << std::endl; - std::cerr << "=== API Timing Results: ===" << std::endl; - std::cerr << std::endl; - std::cerr << "Total Execution Time (ns): " << time.count() << std::endl; - - if (cpu_collector != nullptr) { - std::cerr << "Total API Time for CPU backend (ns): " << - CalculateTotalTime(cpu_collector) << std::endl; - } - if (gpu_collector != nullptr) { - std::cerr << "Total API Time for GPU backend (ns): " << - CalculateTotalTime(gpu_collector) << std::endl; + size_t function_length = 0; + for (auto& item : function_time_map) { + auto& name = item.first; + if (name.size() > function_length) { + function_length = name.size(); + } } + PTI_ASSERT(function_length > 0); - if (cpu_collector != nullptr) { - PrintDeviceTable(cpu_collector, "CPU"); - } - if (gpu_collector != nullptr) { - PrintDeviceTable(gpu_collector, "GPU"); + std::cerr << std::endl; + std::cerr << std::setw(function_length) << "Function" << "," << + std::setw(12) << "Calls" << "," << + std::setw(20) << "Time (ns)" << "," << + std::setw(20) << "Average (ns)" << std::endl; + + for (auto& item : function_time_map) { + auto& name = item.first; + uint64_t time = item.second; + PTI_ASSERT(function_count_map.count(name) == 1); + uint64_t count = function_count_map[name]; + std::cerr << std::setw(function_length) << name << "," << + std::setw(12) << count << "," << + std::setw(20) << time << "," << + std::setw(20) << time / count << std::endl; } - std::cerr << std::endl; } // Internal Tool Interface //////////////////////////////////////////////////// void EnableProfiling() { - cl_device_id cpu_device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_CPU); - cl_device_id gpu_device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU); - if (cpu_device == nullptr && gpu_device == nullptr) { - std::cerr << "[WARNING] Unable to find device for tracing" << std::endl; + cl_int status = CL_SUCCESS; + + // Get GPU device + cl_device_id device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU); + if (device == nullptr) { + std::cerr << + "[WARNING] Unable to find GPU device for tracing" << std::endl; return; } - if (gpu_device == nullptr) { - std::cerr << "[WARNING] Unable to find GPU device for tracing" << - std::endl; - } - if (cpu_device == nullptr) { - std::cerr << "[WARNING] Unable to find CPU device for tracing" << - std::endl; + // Get pointers for tracing functions + bool loaded = LoadTracingFunctions(device); + if (!loaded) { + std::cerr << + "[WARNING] Unable to load pointers for tracing functions" << std::endl; + return; } - if (cpu_device != nullptr) { - cpu_collector = ClApiCollector::Create(cpu_device); - } - if (gpu_device != nullptr) { - gpu_collector = ClApiCollector::Create(gpu_device); + // Create tracing handle + status = clCreateTracingHandle(device, Callback, nullptr, &tracer); + PTI_ASSERT(status == CL_SUCCESS); + + // Switch on tracing for all of the functions + for (int fid = 0; fid < CL_FUNCTION_COUNT; ++fid) { + status = clSetTracingPoint( + tracer, static_cast(fid), CL_TRUE); + PTI_ASSERT(status == CL_SUCCESS); } - start = std::chrono::steady_clock::now(); + // Enable tracing + status = clEnableTracing(tracer); + PTI_ASSERT(status == CL_SUCCESS); } void DisableProfiling() { - if (cpu_collector != nullptr) { - cpu_collector->DisableTracing(); - } - if (gpu_collector != nullptr) { - gpu_collector->DisableTracing(); + if (tracer == nullptr) { + return; } + + cl_int status = CL_SUCCESS; + + // Disable tracing + status = clDisableTracing(tracer); + PTI_ASSERT(status == CL_SUCCESS); + + // Destroy tracing handle + status = clDestroyTracingHandle(tracer); + PTI_ASSERT(status == CL_SUCCESS); + PrintResults(); - if (cpu_collector != nullptr) { - delete cpu_collector; - } - if (gpu_collector != nullptr) { - delete gpu_collector; - } } \ No newline at end of file diff --git a/tests/cl_hot_functions.py b/tests/cl_hot_functions.py index 24a9521..f0bffc3 100644 --- a/tests/cl_hot_functions.py +++ b/tests/cl_hot_functions.py @@ -3,8 +3,6 @@ import sys import cl_gemm -import dpc_gemm -import omp_gemm import utils def config(path): @@ -31,7 +29,7 @@ def parse(output): total_time = 0 for line in lines: items = line.split(",") - if len(items) != 7 or line.find("Time (ns)") != -1: + if len(items) != 4 or line.find("Time (ns)") != -1: continue function_name = items[0].strip() call_count = int(items[1].strip()) @@ -43,25 +41,11 @@ def parse(output): return False return True -def run(path, option): - if option == "dpc": - app_folder = utils.get_sample_build_path("dpc_gemm") - app_file = os.path.join(app_folder, "dpc_gemm") - option = "cpu" - p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) - elif option == "omp": - app_folder = utils.get_sample_build_path("omp_gemm") - app_file = os.path.join(app_folder, "omp_gemm") - option = "gpu" - e = utils.add_env(None, "LIBOMPTARGET_PLUGIN", "OPENCL") - p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ - env = e, cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) - else: - app_folder = utils.get_sample_build_path("cl_gemm") - app_file = os.path.join(app_folder, "cl_gemm") - p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) +def run(path): + app_folder = utils.get_sample_build_path("cl_gemm") + app_file = os.path.join(app_folder, "cl_gemm") + p = subprocess.Popen(["./cl_hot_functions", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: return stderr @@ -73,36 +57,20 @@ def run(path, option): def main(option): path = utils.get_sample_build_path("cl_hot_functions") - if option == "dpc": - log = dpc_gemm.main("cpu") - if log: - return log - elif option == "omp": - log = omp_gemm.main("gpu") - if log: - return log - else: - log = cl_gemm.main(option) - if log: - return log + log = cl_gemm.main("gpu") + if log: + return log log = config(path) if log: return log log = build(path) if log: return log - log = run(path, option) + log = run(path) if log: return log if __name__ == "__main__": - option = "gpu" - if len(sys.argv) > 1 and sys.argv[1] == "cpu": - option = "cpu" - if len(sys.argv) > 1 and sys.argv[1] == "dpc": - option = "dpc" - if len(sys.argv) > 1 and sys.argv[1] == "omp": - option = "omp" - log = main(option) + log = main(None) if log: print(log) \ No newline at end of file diff --git a/tests/cl_tracer.py b/tests/cl_tracer.py index 283939f..9dc910d 100644 --- a/tests/cl_tracer.py +++ b/tests/cl_tracer.py @@ -3,6 +3,7 @@ import sys import cl_gemm +import dpc_gemm import utils def config(path): @@ -25,20 +26,37 @@ def build(path): return None def run(path, option): - app_folder = utils.get_sample_build_path("cl_gemm") - app_file = os.path.join(app_folder, "cl_gemm") - p = subprocess.Popen(["./cl_tracer", option, app_file, "cpu", "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + if option == "dpc": + app_folder = utils.get_sample_build_path("dpc_gemm") + app_file = os.path.join(app_folder, "dpc_gemm") + p = subprocess.Popen(["./cl_tracer", "-h", "-d", app_file, "cpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + else: + app_folder = utils.get_sample_build_path("cl_gemm") + app_file = os.path.join(app_folder, "cl_gemm") + if option == "gpu": + p = subprocess.Popen(["./cl_tracer", "-h", "-d", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + else: + p = subprocess.Popen(["./cl_tracer", option, app_file, "cpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stderr: return stdout if stdout.find(" CORRECT") == -1: return stdout + if stderr.find("WARNING") != -1: + return stderr return None def main(option): path = utils.get_tool_build_path("cl_tracer") - log = cl_gemm.main("cpu") + if option == "dpc": + log = dpc_gemm.main("cpu") + elif option == "gpu": + log = cl_gemm.main("gpu") + else: + log = cl_gemm.main("cpu") if log: return log log = config(path) @@ -65,6 +83,10 @@ def main(option): option = "--chrome-device-timeline" if len(sys.argv) > 1 and sys.argv[1] == "--chrome-device-stages": option = "--chrome-device-stages" + if len(sys.argv) > 1 and sys.argv[1] == "gpu": + option = "gpu" + if len(sys.argv) > 1 and sys.argv[1] == "dpc": + option = "dpc" log = main(option) if log: print(log) \ No newline at end of file diff --git a/tests/gpu_info.py b/tests/gpuinfo.py similarity index 93% rename from tests/gpu_info.py rename to tests/gpuinfo.py index 4ef227e..b063d6b 100644 --- a/tests/gpu_info.py +++ b/tests/gpuinfo.py @@ -36,7 +36,7 @@ def parse(output, option): lines = output.split("\n") total_values = 0 for line in lines: - if line.find("Device Information") != -1: + if line.find("Device") == 0: continue items = line.strip().split(":") if len(items) == 2: @@ -71,7 +71,7 @@ def parse(output, option): return False if total_sets <= total_groups: return False - if total_metrics <= total_sets: + if total_metrics <= total_sets: return False else: return False @@ -79,7 +79,7 @@ def parse(output, option): return True def run(path, option): - p = subprocess.Popen(["./gpu_info", option],\ + p = subprocess.Popen(["./gpuinfo", option],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if stderr: @@ -89,7 +89,7 @@ def run(path, option): return None def main(option): - path = utils.get_tool_build_path("gpu_info") + path = utils.get_tool_build_path("gpuinfo") log = config(path) if log: return log diff --git a/tests/onetrace.py b/tests/onetrace.py index e9065a8..bf08e10 100644 --- a/tests/onetrace.py +++ b/tests/onetrace.py @@ -2,7 +2,10 @@ import subprocess import sys +import cl_gemm import dpc_gemm +import omp_gemm +import ze_gemm import utils def config(path): @@ -25,20 +28,45 @@ def build(path): return None def run(path, option): - app_folder = utils.get_sample_build_path("dpc_gemm") - app_file = os.path.join(app_folder, "dpc_gemm") - p = subprocess.Popen(["./onetrace", option, app_file, "gpu", "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + if option == "cl": + app_folder = utils.get_sample_build_path("cl_gemm") + app_file = os.path.join(app_folder, "cl_gemm") + p = subprocess.Popen(["./onetrace", "-h", "-d", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + elif option == "ze": + app_folder = utils.get_sample_build_path("ze_gemm") + app_file = os.path.join(app_folder, "ze_gemm") + p = subprocess.Popen(["./onetrace", "-h", "-d", app_file, "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + elif option == "omp": + app_folder = utils.get_sample_build_path("omp_gemm") + app_file = os.path.join(app_folder, "omp_gemm") + p = subprocess.Popen(["./onetrace", "-h", "-d", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + else: + app_folder = utils.get_sample_build_path("dpc_gemm") + app_file = os.path.join(app_folder, "dpc_gemm") + p = subprocess.Popen(["./onetrace", option, app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stderr: return stdout if stdout.find(" CORRECT") == -1: return stdout + if stderr.find("WARNING") != -1: + return stderr return None def main(option): path = utils.get_tool_build_path("onetrace") - log = dpc_gemm.main("gpu") + if option == "cl": + log = cl_gemm.main("gpu") + elif option == "ze": + log = ze_gemm.main(None) + elif option == "omp": + log = omp_gemm.main("gpu") + else: + log = dpc_gemm.main("gpu") if log: return log log = config(path) @@ -65,6 +93,12 @@ def main(option): option = "--chrome-device-timeline" if len(sys.argv) > 1 and sys.argv[1] == "--chrome-device-stages": option = "--chrome-device-stages" + if len(sys.argv) > 1 and sys.argv[1] == "cl": + option = "cl" + if len(sys.argv) > 1 and sys.argv[1] == "ze": + option = "ze" + if len(sys.argv) > 1 and sys.argv[1] == "omp": + option = "omp" log = main(option) if log: print(log) \ No newline at end of file diff --git a/tests/run.py b/tests/run.py index c629f52..1e4faea 100644 --- a/tests/run.py +++ b/tests/run.py @@ -11,7 +11,7 @@ ["cl_gemm_itt", "gpu", "cpu"], ["cl_debug_info", None], ["cl_gpu_metrics", None], - ["cl_hot_functions", "gpu", "cpu", "dpc", "omp"], + ["cl_hot_functions", None], ["cl_hot_kernels", "gpu", "cpu", "dpc", "omp"], ["gpu_inst_count", "cl", "ze", "dpc"], ["gpu_perfmon_read", "cl", "ze", "dpc"], @@ -30,10 +30,10 @@ ["dpc_gemm", "gpu", "cpu", "host"], ["dpc_info", "-a", "-l"]] -tools = [["gpu_info", "-l", "-i", "-m"], - ["onetrace", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages"], - ["cl_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages"], - ["ze_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages"]] +tools = [["gpuinfo", "-l", "-i", "-m"], + ["onetrace", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "cl", "ze", "omp"], + ["cl_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "gpu", "dpc"], + ["ze_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "dpc", "omp"]] def remove_python_cache(path): files = os.listdir(path) diff --git a/tests/ze_tracer.py b/tests/ze_tracer.py index 4d910ee..1bec083 100644 --- a/tests/ze_tracer.py +++ b/tests/ze_tracer.py @@ -2,6 +2,8 @@ import subprocess import sys +import dpc_gemm +import omp_gemm import ze_gemm import utils @@ -25,20 +27,38 @@ def build(path): return None def run(path, option): - app_folder = utils.get_sample_build_path("ze_gemm") - app_file = os.path.join(app_folder, "ze_gemm") - p = subprocess.Popen(["./ze_tracer", option, app_file, "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + if option == "dpc": + app_folder = utils.get_sample_build_path("dpc_gemm") + app_file = os.path.join(app_folder, "dpc_gemm") + p = subprocess.Popen(["./ze_tracer", "-h", "-d", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + elif option == "omp": + app_folder = utils.get_sample_build_path("omp_gemm") + app_file = os.path.join(app_folder, "omp_gemm") + p = subprocess.Popen(["./ze_tracer", "-h", "-d", app_file, "gpu", "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + else: + app_folder = utils.get_sample_build_path("ze_gemm") + app_file = os.path.join(app_folder, "ze_gemm") + p = subprocess.Popen(["./ze_tracer", option, app_file, "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stderr: return stdout if stdout.find(" CORRECT") == -1: return stdout + if stderr.find("WARNING") != -1: + return stderr return None def main(option): path = utils.get_tool_build_path("ze_tracer") - log = ze_gemm.main(None) + if option == "dpc": + log = dpc_gemm.main("gpu") + elif option == "omp": + log = omp_gemm.main("gpu") + else: + log = ze_gemm.main(None) if log: return log log = config(path) @@ -65,6 +85,10 @@ def main(option): option = "--chrome-device-timeline" if len(sys.argv) > 1 and sys.argv[1] == "--chrome-device-stages": option = "--chrome-device-stages" + if len(sys.argv) > 1 and sys.argv[1] == "dpc": + option = "dpc" + if len(sys.argv) > 1 and sys.argv[1] == "omp": + option = "omp" log = main(option) if log: print(log) \ No newline at end of file diff --git a/tools/gpu_info/README.md b/tools/gpu_info/README.md deleted file mode 100644 index 446a890..0000000 --- a/tools/gpu_info/README.md +++ /dev/null @@ -1,126 +0,0 @@ -# GPU Info -## Overview -This sample application queries the information about the Intel(R) Processor Graphics device, that includes general GPU properties and hardware metrics, available to collect through [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery). - -This application could be used as the test to check if Intel(R) Metrics Discovery Application Programming Interface library is available on the system. -The following options are supported: -``` -Usage: ./gpu_info[.exe] -Options: ---list [-l] Print list of available devices ---info [-i] Print general device info ---metrics [-m] Print list of available mertrics ---device [-d] Target device id ---subdevice [-s] Target subdevice id -``` - -Expected output should be the following: -* In case of printing the list of available GPU devices (`-l`): - ``` - Device 0: Intel(R) Iris(R) Plus Graphics 655 (CFL GT3) - ``` -* In case of retrieving device information (`-d`) for device 0 and subdevice 0: - ``` - Device (0, 0): - Device Name: Intel(R) HD Graphics 630 (Kaby Lake GT2) - EuCoresTotalCount: 24 - EuCoresPerSubsliceCount: 8 - EuSubslicesTotalCount: 3 - EuSubslicesPerSliceCount: 3 - EuSlicesTotalCount: 1 - EuThreadsCount: 7 - SubsliceMask: 7 - SliceMask: 1 - SamplersTotalCount: 3 - GpuMinFrequencyMHz: 350 - GpuMaxFrequencyMHz: 1150 - GpuCurrentFrequencyMHz: 350 - PciDeviceId: 22802 - SkuRevisionId: 4 - PlatformIndex: 12 - ApertureSize: 0 - NumberOfRenderOutputUnits: 4 - NumberOfShadingUnits: 28 - OABufferMinSize: 16777216 - OABufferMaxSize: 16777216 - GpuTimestampFrequency: 12000000 - MaxTimestamp: 357913941250 - ``` -* In case of retrieving metrics information (`-m`) for device 0 and subdevice 0: - ``` - Device (0, 0): - Metric Group 0: OcclusionQueryStats - Metric Set 0: RenderedPixelsStats (Rendered Pixels Statistics) - Metric 0: OcclusionQueryStats / RenderedPixelsStats / PixelsRendered (Depth passed pixels) - Metric Set 1: RenderedFragmentsStats (Rendered Fragments Statistics) - Metric 0: OcclusionQueryStats / RenderedFragmentsStats / PixelsRendered (Depth passed fragments) - ... - Metric Group 3: OA - Metric Set 0: RenderBasic (Render Metrics Basic Gen9) - Metric 0: OA / RenderBasic / GpuTime (GPU Time Elapsed) - Metric 1: OA / RenderBasic / GpuCoreClocks (GPU Core Clocks) - Metric 2: OA / RenderBasic / AvgGpuCoreFrequencyMHz (AVG GPU Core Frequency) - ... - Metric 51: OA / RenderBasic / SamplerBottleneck (Samplers Bottleneck) - Info 52: OA / RenderBasic / QueryBeginTime (Query Begin Time) - Info 53: OA / RenderBasic / CoreFrequencyMHz (GPU Core Frequency) - Info 54: OA / RenderBasic / EuSliceFrequencyMHz (EU Slice Frequency) - Info 55: OA / RenderBasic / ReportReason (Report Reason) - Info 56: OA / RenderBasic / ContextId (Context ID) - Info 57: OA / RenderBasic / CoreFrequencyChanged (GPU Core Frequency Changed) - Info 58: OA / RenderBasic / QuerySplitOccurred (Query Split Occurred) - Info 59: OA / RenderBasic / ReportId (Query report id) - Info 60: OA / RenderBasic / ReportsCount (Query reports count) - Info 61: OA / RenderBasic / OverrunOccured (Query Overrun Occurred) - Metric Set 1: ComputeBasic (Compute Metrics Basic Gen9) - Metric 0: OA / ComputeBasic / GpuTime (GPU Time Elapsed) - ... - Info 48: OA / ComputeBasic / OverrunOccured (Query Overrun Occurred) - ... - ``` -## Supported OS -- Linux -- Windows (*under development*) - -## Prerequisites -- [CMake](https://cmake.org/) (version 3.12 and above) -- [Git](https://git-scm.com/) (version 1.8 and above) -- [Python](https://www.python.org/) (version 2.7 and above) -- [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery) - -## Build and Run -### Linux -Run the following commands to build the sample: -```sh -cd /tools/gpu_info -mkdir build -cd build -cmake -DCMAKE_BUILD_TYPE=Release .. -make -``` -Use this command line to run the utility: -```sh -./gpu_info -``` -Since Intel(R) Metrics Discovery Application Programming Interface library is loaded at runtime, one may need to set its path explicitly, e.g.: -```sh -LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./gpu_info -``` -### Windows -Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample: -```sh -cd \tools\gpu_info -mkdir build -cd build -cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release .. -nmake -``` -Use this command line to run the application: -```sh -gpu_info.exe -``` -Since Intel(R) Metrics Discovery Application Programming Interface library is loaded at runtime, one may need to set its path explicitly (see the output of cmake), e.g.: -```sh -set PATH=%PATH%;C:\Windows\system32\DriverStore\FileRepository\igdlh64.inf_amd64_d59561bc9241aaf5 -gpu_info.exe -``` \ No newline at end of file diff --git a/tools/gpu_info/CMakeLists.txt b/tools/gpuinfo/CMakeLists.txt similarity index 64% rename from tools/gpu_info/CMakeLists.txt rename to tools/gpuinfo/CMakeLists.txt index 38a29ea..581b8e7 100644 --- a/tools/gpu_info/CMakeLists.txt +++ b/tools/gpuinfo/CMakeLists.txt @@ -6,20 +6,20 @@ project(PTI_Samples_GPU_Info CXX) SetCompilerFlags() SetBuildType() -add_executable(gpu_info main.cc) -target_include_directories(gpu_info +add_executable(gpuinfo main.cc) +target_include_directories(gpuinfo PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") if(CMAKE_INCLUDE_PATH) - target_include_directories(gpu_info + target_include_directories(gpuinfo PUBLIC "${CMAKE_INCLUDE_PATH}") endif() if(UNIX) - target_link_libraries(gpu_info + target_link_libraries(gpuinfo dl) endif() -GetMDHeaders(gpu_info) +GetMDHeaders(gpuinfo) CheckForMDLibrary() -install(TARGETS gpu_info DESTINATION bin) \ No newline at end of file +install(TARGETS gpuinfo DESTINATION bin) \ No newline at end of file diff --git a/tools/gpuinfo/README.md b/tools/gpuinfo/README.md new file mode 100644 index 0000000..ff6c219 --- /dev/null +++ b/tools/gpuinfo/README.md @@ -0,0 +1,116 @@ +# GPU Information +## Overview +This sample application queries the information about the Intel(R) Processor Graphics device, that includes general GPU properties and hardware metrics, available to collect through [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery). + +This application could be used as the test to check if Intel(R) Metrics Discovery Application Programming Interface library is available on the system. +The following options are supported: +``` +Usage: ./gpuinfo[.exe] +Options: +-l Print list of available devices +-i Print general device info +-m Print list of available mertrics +-h Print this help message +``` + +Expected output should be the following: +* In case of printing the list of available GPU devices (`-l`): + ``` + Device 0: Intel(R) Iris(R) Plus Graphics 655 (CFL GT3) + ``` +* In case of retrieving device information (`-i`) the following output is expected for all of the devices and subdevices: + ``` + Device 0: + ---- Name: Intel(R) Iris(R) Plus Graphics 655 (CFL GT3) + ---- EuCoresTotalCount: 48 + ---- EuCoresPerSubsliceCount: 8 + ---- EuSubslicesTotalCount: 6 + ---- EuSubslicesPerSliceCount: 3 + ---- EuSlicesTotalCount: 2 + ---- EuThreadsCount: 7 + ---- SliceMask: 3 + ---- SubsliceMask: 63 + ---- SamplersTotalCount: 6 + ---- GpuMinFrequencyMHz: 300 + ---- GpuMaxFrequencyMHz: 1200 + ---- GpuCurrentFrequencyMHz: 300 + ---- PciDeviceId: 16037 + ---- SkuRevisionId: 1 + ---- PlatformIndex: 18 + ---- ApertureSize: 0 + ---- NumberOfRenderOutputUnits: 8 + ---- NumberOfShadingUnits: 28 + ---- OABufferMinSize: 16777216 + ---- OABufferMaxSize: 16777216 + ---- GpuTimestampFrequency: 12000000 + ---- MaxTimestamp: 357913941250 + ``` +* In case of retrieving metrics information (`-m`) the following output is expected for all of the devices and subdevices: + ``` + Device 0: + ---- Metric Group 0: OcclusionQueryStats + ------ Metric Set 0: RenderedPixelsStats (Rendered Pixels Statistics) + -------- Metric 0: OcclusionQueryStats / RenderedPixelsStats / PixelsRendered (Depth passed pixels) [UINT64] + ------ Metric Set 1: RenderedFragmentsStats (Rendered Fragments Statistics) + -------- Metric 0: OcclusionQueryStats / RenderedFragmentsStats / PixelsRendered (Depth passed fragments) [UINT64] + ... + ---- Metric Group 2: PipelineStatistics + ------ Metric Set 0: PipelineStats (Pipeline Statistics for OGL4) + -------- Metric 0: PipelineStatistics / PipelineStats / IAVertices (Input vertices) [UINT64] + -------- Metric 1: PipelineStatistics / PipelineStats / IAPrimitives (Input primitives) [UINT64] + ... + ---- Metric Group 3: OA + ------ Metric Set 0: RenderBasic (Render Metrics Basic Gen9) + -------- Metric 0: OA / RenderBasic / GpuTime (GPU Time Elapsed) [UINT64] + -------- Metric 1: OA / RenderBasic / GpuCoreClocks (GPU Core Clocks) [UINT64] + -------- Metric 2: OA / RenderBasic / AvgGpuCoreFrequencyMHz (AVG GPU Core Frequency) [UINT64] + ... + -------- Info 62: OA / RenderBasic / StreamMarker (Stream marker) + ... + ``` +## Supported OS +- Linux +- Windows (*under development*) + +## Prerequisites +- [CMake](https://cmake.org/) (version 3.12 and above) +- [Git](https://git-scm.com/) (version 1.8 and above) +- [Python](https://www.python.org/) (version 2.7 and above) +- [Intel(R) Metrics Discovery Application Programming Interface](https://github.com/intel/metrics-discovery) + +## Build and Run +### Linux +Run the following commands to build the sample: +```sh +cd /tools/gpuinfo +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release .. +make +``` +Use this command line to run the utility: +```sh +./gpuinfo +``` +Since Intel(R) Metrics Discovery Application Programming Interface library is loaded at runtime, one may need to set its path explicitly, e.g.: +```sh +LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./gpuinfo +``` +### Windows +Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample: +```sh +cd \tools\gpuinfo +mkdir build +cd build +cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release .. +nmake +``` +Use this command line to run the application: +```sh +gpuinfo.exe +``` +Since Intel(R) Metrics Discovery Application Programming Interface library is loaded at runtime, one may need to set its path explicitly (see the output of cmake), e.g.: +```sh +set PATH=%PATH%;C:\Windows\system32\DriverStore\FileRepository\igdlh64.inf_amd64_d59561bc9241aaf5 +gpuinfo.exe +``` \ No newline at end of file diff --git a/tools/gpu_info/main.cc b/tools/gpuinfo/main.cc similarity index 60% rename from tools/gpu_info/main.cc rename to tools/gpuinfo/main.cc index 6715f86..13afdbe 100644 --- a/tools/gpu_info/main.cc +++ b/tools/gpuinfo/main.cc @@ -17,30 +17,13 @@ enum Mode { GPU_METRICS = 2 }; -struct Options { - Mode mode; - uint32_t device; - uint32_t sub_device; -}; - static void Usage() { - std::cout << "Usage: ./gpu_info[.exe] " << std::endl; + std::cout << "Usage: ./gpuinfo[.exe] " << std::endl; std::cout << "Options:" << std::endl; - std::cout << - "--list [-l] Print list of available devices" << - std::endl; - std::cout << - "--info [-i] Print general device info" << - std::endl; - std::cout << - "--metrics [-m] Print list of available mertrics" << - std::endl; - std::cout << - "--device [-d] Target device id" << - std::endl; - std::cout << - "--subdevice [-s] Target subdevice id" << - std::endl; + std::cout << "-l Print list of available devices" << std::endl; + std::cout << "-i Print general device info" << std::endl; + std::cout << "-m Print list of available mertrics" << std::endl; + std::cout << "-h Print this help message" << std::endl; } static const char* GetDeviceName(const MetricDevice& device) { @@ -54,7 +37,7 @@ static void PrintDeviceList() { if (sub_device_count == 0) { MetricDevice* device = MetricDevice::Create(i, 0); if (device == nullptr) { - std::cout << "[Warning] Unable to open metric device" << std::endl; + std::cout << "[WARNING] Unable to open metric device" << std::endl; return; } std::cout << "Device " << i << ": " << @@ -65,10 +48,10 @@ static void PrintDeviceList() { for (uint32_t j = 0; j < sub_device_count; ++j) { MetricDevice* device = MetricDevice::Create(i, j); if (device == nullptr) { - std::cout << "[Warning] Unable to open metric device" << std::endl; + std::cout << "[WARNING] Unable to open metric device" << std::endl; return; } - std::cout << "\tSubdevice " << j << ": " << + std::cout << "-- Subdevice " << j << ": " << GetDeviceName(*device) << std::endl; delete device; } @@ -93,7 +76,7 @@ static std::string GetResultType(md::TMetricResultType type) { } static void PrintDeviceInfo(const MetricDevice& device) { - std::cout << "\tName: " << + std::cout << "---- Name: " << device->GetParams()->DeviceName << std::endl; PTI_ASSERT(device->GetParams()->GlobalSymbolsCount > 0); for (uint32_t i = 0; i < device->GetParams()->GlobalSymbolsCount; ++i) { @@ -102,7 +85,7 @@ static void PrintDeviceInfo(const MetricDevice& device) { continue; } - std::cout << "\t" << symbol->SymbolName << ": "; + std::cout << "---- " << symbol->SymbolName << ": "; switch (symbol->SymbolTypedValue.ValueType) { case md::VALUE_TYPE_UINT32: { std::cout << symbol->SymbolTypedValue.ValueUInt32; @@ -141,7 +124,7 @@ static void PrintMetricsInfo(const MetricDevice& device) { PTI_ASSERT(group != nullptr); const char* group_name = group->GetParams()->SymbolName; - std::cout << "\tMetric Group " << gid << ": " << group_name << std::endl; + std::cout << "---- Metric Group " << gid << ": " << group_name << std::endl; uint32_t set_count = group->GetParams()->MetricSetsCount; PTI_ASSERT(set_count > 0); @@ -150,7 +133,7 @@ static void PrintMetricsInfo(const MetricDevice& device) { PTI_ASSERT(set != nullptr); const char* set_name = set->GetParams()->SymbolName; - std::cout << "\t\tMetric Set " << sid << ": " << set_name << + std::cout << "------ Metric Set " << sid << ": " << set_name << " (" << set->GetParams()->ShortName << ")" << std::endl; uint32_t metric_count = set->GetParams()->MetricsCount; @@ -158,7 +141,7 @@ static void PrintMetricsInfo(const MetricDevice& device) { for (uint32_t mid = 0; mid < metric_count; ++mid) { md::IMetric_1_0* metric = set->GetMetric(mid); PTI_ASSERT(metric != nullptr); - std::cout << "\t\t\tMetric " << mid << ": " << group_name << " / " << + std::cout << "-------- Metric " << mid << ": " << group_name << " / " << set_name << " / " << metric->GetParams()->SymbolName << " (" << metric->GetParams()->ShortName << ") [" << GetResultType(metric->GetParams()->ResultType) << "]" << std::endl; @@ -168,7 +151,7 @@ static void PrintMetricsInfo(const MetricDevice& device) { for (uint32_t iid = 0; iid < info_count; ++iid) { md::IInformation_1_0* info = set->GetInformation(iid); PTI_ASSERT(info != nullptr); - std::cout << "\t\t\tInfo " << iid + metric_count << ": " << + std::cout << "-------- Info " << iid + metric_count << ": " << group_name << " / " << set_name << " / " << info->GetParams()->SymbolName << " (" << info->GetParams()->ShortName << ")" << std::endl; @@ -177,86 +160,61 @@ static void PrintMetricsInfo(const MetricDevice& device) { } } -static Options ParseArgs(int argc, char* argv[]) { - Options options{GPU_LIST, 0, 0}; - - int i = 1; - while (i < argc) { - std::string option(argv[i]); - if (option == "--list"|| option == "-l") { - options.mode = GPU_LIST; - ++i; - } else if (option == "--info"|| option == "-i") { - options.mode = GPU_INFO; - ++i; - } else if (option == "--metrics" || option == "-m") { - options.mode = GPU_METRICS; - ++i; - } else if (option == "--device" || option == "-d") { - ++i; - if (i < argc) { - std::string value(argv[i]); - if (value.find_first_not_of("0123456789") == std::string::npos) { - options.device = std::stoul(value); - } - ++i; - } - } else if (option == "--subdevice" || option == "-s") { - ++i; - if (i < argc) { - std::string value(argv[i]); - if (value.find_first_not_of("0123456789") == std::string::npos) { - options.sub_device = std::stoul(value); - } - ++i; - } - } else { - ++i; +int main(int argc, char* argv[]) { + Mode mode = GPU_INFO; + + if (argc >= 2) { + if (std::string("-l") == argv[1]) { + mode = GPU_LIST; + } else if (std::string("-m") == argv[1]) { + mode = GPU_METRICS; + } else if (std::string("-h") == argv[1]) { + Usage(); + return 0; } } - return options; -} - -int main(int argc, char* argv[]) { - if (argc < 2) { - Usage(); + uint32_t device_count = MetricDevice::GetDeviceCount(); + if (device_count == 0) { + std::cout << "Unable to find GPU devices" << std::endl; return 0; } - Options options = ParseArgs(argc, argv); - switch (options.mode) { - case GPU_LIST: - PrintDeviceList(); - break; - case GPU_INFO: { - MetricDevice* device = - MetricDevice::Create(options.device, options.sub_device); - if (device == nullptr) { - std::cout << "[Warning] Unable to open metric device" << std::endl; - return 0; + if (mode == GPU_LIST) { + PrintDeviceList(); + } else { + for (uint32_t i = 0; i < device_count; ++i) { + std::cout << "Device " << i << ":" << std::endl; + uint32_t sub_device_count = MetricDevice::GetSubDeviceCount(i); + if (sub_device_count > 0) { + for (uint32_t j = 0; j < sub_device_count; ++j) { + std::cout << "-- Subdevice " << j << ":" << std::endl; + MetricDevice* device = MetricDevice::Create(i, j); + if (device == nullptr) { + std::cout << "[WARNING] Unable to open metric device" << std::endl; + return 0; + } + if (mode == GPU_INFO) { + PrintDeviceInfo(*device); + } else { + PrintMetricsInfo(*device); + } + delete device; } - std::cout << "Device (" << options.device << ", " << - options.sub_device << "):" << std::endl; - PrintDeviceInfo(*device); - delete device; - } - break; - case GPU_METRICS: { - MetricDevice* device = - MetricDevice::Create(options.device, options.sub_device); + } else { + MetricDevice* device = MetricDevice::Create(i, 0); if (device == nullptr) { - std::cout << "[Warning] Unable to open metric device" << std::endl; + std::cout << "[WARNING] Unable to open metric device" << std::endl; return 0; } - std::cout << "Device (" << options.device << ", " << - options.sub_device << "):" << std::endl; - PrintMetricsInfo(*device); + if (mode == GPU_INFO) { + PrintDeviceInfo(*device); + } else { + PrintMetricsInfo(*device); + } delete device; } - break; - default: - break; + } } return 0;