diff --git a/VERSION b/VERSION index 1db0ede..a7f3fc2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.21.0 \ No newline at end of file +0.22.0 \ No newline at end of file diff --git a/samples/build_utils/CMakeLists.txt b/build_utils/CMakeLists.txt similarity index 100% rename from samples/build_utils/CMakeLists.txt rename to build_utils/CMakeLists.txt diff --git a/samples/build_utils/build_utils.py b/build_utils/build_utils.py similarity index 100% rename from samples/build_utils/build_utils.py rename to build_utils/build_utils.py diff --git a/samples/build_utils/get_cl_headers.py b/build_utils/get_cl_headers.py similarity index 100% rename from samples/build_utils/get_cl_headers.py rename to build_utils/get_cl_headers.py diff --git a/samples/build_utils/get_cl_tracing_headers.py b/build_utils/get_cl_tracing_headers.py similarity index 100% rename from samples/build_utils/get_cl_tracing_headers.py rename to build_utils/get_cl_tracing_headers.py diff --git a/samples/build_utils/get_gmm_headers.py b/build_utils/get_gmm_headers.py similarity index 100% rename from samples/build_utils/get_gmm_headers.py rename to build_utils/get_gmm_headers.py diff --git a/samples/build_utils/get_gtpin_headers.py b/build_utils/get_gtpin_headers.py similarity index 100% rename from samples/build_utils/get_gtpin_headers.py rename to build_utils/get_gtpin_headers.py diff --git a/samples/build_utils/get_gtpin_libs.py b/build_utils/get_gtpin_libs.py similarity index 100% rename from samples/build_utils/get_gtpin_libs.py rename to build_utils/get_gtpin_libs.py diff --git a/samples/build_utils/get_iga_headers.py b/build_utils/get_iga_headers.py similarity index 100% rename from samples/build_utils/get_iga_headers.py rename to build_utils/get_iga_headers.py diff --git a/samples/build_utils/get_igc_headers.py b/build_utils/get_igc_headers.py similarity index 100% rename from samples/build_utils/get_igc_headers.py rename to build_utils/get_igc_headers.py diff --git a/samples/build_utils/get_itt.py b/build_utils/get_itt.py similarity index 100% rename from samples/build_utils/get_itt.py rename to build_utils/get_itt.py diff --git a/samples/build_utils/get_md_headers.py b/build_utils/get_md_headers.py similarity index 100% rename from samples/build_utils/get_md_headers.py rename to build_utils/get_md_headers.py diff --git a/chapters/device_activity_tracing/LevelZero.md b/chapters/device_activity_tracing/LevelZero.md index d125ca0..d17f4b9 100644 --- a/chapters/device_activity_tracing/LevelZero.md +++ b/chapters/device_activity_tracing/LevelZero.md @@ -138,4 +138,8 @@ Event pool profiling does not require any additional environment variables to be ## Samples - [Level Zero GEMM](../../samples/ze_gemm) -- [Level Zero Hot Kernels](../../samples/ze_hot_kernels) \ No newline at end of file +- [Level Zero Hot Kernels](../../samples/ze_hot_kernels) + +## Tools +- [Level Zero Tracer](../../tools/ze_tracer) +- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace) \ No newline at end of file diff --git a/chapters/runtime_api_tracing/LevelZero.md b/chapters/runtime_api_tracing/LevelZero.md index a7e6a74..a5712a5 100644 --- a/chapters/runtime_api_tracing/LevelZero.md +++ b/chapters/runtime_api_tracing/LevelZero.md @@ -97,4 +97,8 @@ ZE_ENABLE_TRACING_LAYER=1 ./ - [Level Zero Hot Functions](../../samples/ze_hot_functions) - [Level Zero Hot Kernels](../../samples/ze_hot_kernels) - [Level Zero Debug Info](../../samples/ze_debug_info) -- [Level Zero Metric Query](../../samples/ze_metric_query) \ No newline at end of file +- [Level Zero Metric Query](../../samples/ze_metric_query) + +## Tools +- [Level Zero Tracer](../../tools/ze_tracer) +- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace) \ No newline at end of file diff --git a/samples/loader/init.cc b/loader/init.cc similarity index 100% rename from samples/loader/init.cc rename to loader/init.cc diff --git a/samples/loader/loader.cc b/loader/loader.cc similarity index 100% rename from samples/loader/loader.cc rename to loader/loader.cc diff --git a/samples/loader/loader.h b/loader/loader.h similarity index 100% rename from samples/loader/loader.h rename to loader/loader.h diff --git a/samples/loader/tool.h b/loader/tool.h similarity index 100% rename from samples/loader/tool.h rename to loader/tool.h diff --git a/samples/cl_debug_info/CMakeLists.txt b/samples/cl_debug_info/CMakeLists.txt index a647e6e..baf4d9f 100644 --- a/samples/cl_debug_info/CMakeLists.txt +++ b/samples/cl_debug_info/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,9 +8,9 @@ SetBuildType() # Tool Library -add_library(clt_debug_info SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc) +add_library(clt_debug_info SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(clt_debug_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(clt_debug_info PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -30,9 +30,9 @@ GetGmmHeaders(clt_debug_info) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_debug_info") -add_executable(cl_debug_info "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(cl_debug_info "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(cl_debug_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(cl_debug_info dl) diff --git a/samples/cl_gemm/CMakeLists.txt b/samples/cl_gemm/CMakeLists.txt index f437369..98892ff 100644 --- a/samples/cl_gemm/CMakeLists.txt +++ b/samples/cl_gemm/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(cl_gemm main.cc) target_include_directories(cl_gemm - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(cl_gemm PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/cl_gemm_inst/CMakeLists.txt b/samples/cl_gemm_inst/CMakeLists.txt index 118c2ef..601ae10 100644 --- a/samples/cl_gemm_inst/CMakeLists.txt +++ b/samples/cl_gemm_inst/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(cl_gemm_inst main.cc) target_include_directories(cl_gemm_inst - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(cl_gemm_inst PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/cl_gemm_itt/CMakeLists.txt b/samples/cl_gemm_itt/CMakeLists.txt index f028e40..e0eec69 100644 --- a/samples/cl_gemm_itt/CMakeLists.txt +++ b/samples/cl_gemm_itt/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(cl_gemm_itt main.cc ittnotify.cc) target_include_directories(cl_gemm_itt - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(cl_gemm_itt PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/cl_gpu_metrics/CMakeLists.txt b/samples/cl_gpu_metrics/CMakeLists.txt index 3171954..12ee60b 100644 --- a/samples/cl_gpu_metrics/CMakeLists.txt +++ b/samples/cl_gpu_metrics/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(clt_gpu_metrics SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(clt_gpu_metrics - PRIVATE "${PROJECT_SOURCE_DIR}/../utils" + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" PRIVATE "${PROJECT_SOURCE_DIR}/../cl_hot_kernels") if(CMAKE_INCLUDE_PATH) target_include_directories(clt_gpu_metrics @@ -34,9 +34,9 @@ CheckForMDLibrary() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_gpu_metrics") -add_executable(cl_gpu_metrics "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(cl_gpu_metrics "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(cl_gpu_metrics - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(cl_gpu_metrics dl) diff --git a/samples/cl_hot_functions/CMakeLists.txt b/samples/cl_hot_functions/CMakeLists.txt index a468644..ba97739 100644 --- a/samples/cl_hot_functions/CMakeLists.txt +++ b/samples/cl_hot_functions/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(clt_hot_functions SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(clt_hot_functions - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(clt_hot_functions PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -26,9 +26,9 @@ GetOpenCLTracingHeaders(clt_hot_functions) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_hot_functions") -add_executable(cl_hot_functions "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(cl_hot_functions "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(cl_hot_functions - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(cl_hot_functions dl) diff --git a/samples/cl_hot_functions/README.md b/samples/cl_hot_functions/README.md index 5bc1267..a473b9f 100644 --- a/samples/cl_hot_functions/README.md +++ b/samples/cl_hot_functions/README.md @@ -1,30 +1,43 @@ # OpenCL(TM) Hot Functions ## Overview -This is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count for GPU device. +This is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count. As a result, table like the following will be printed. ``` - Function, Calls, Time (ns), Average (ns) - clBuildProgram, 1, 183549198, 183549198 - clCreateBuffer, 12, 108285, 9023 -clCreateCommandQueueWithProperties, 1, 1265, 1265 - clCreateContext, 1, 9322, 9322 - clCreateKernel, 1, 3428, 3428 - clCreateProgramWithSource, 1, 3219, 3219 - clEnqueueNDRangeKernel, 4, 2237845, 559461 - clEnqueueReadBuffer, 4, 2358133, 589533 - clEnqueueWriteBuffer, 8, 5719781, 714972 - clFinish, 4, 174064236, 43516059 - clGetDeviceIDs, 2, 362, 181 - clGetDeviceInfo, 2, 354, 177 - clGetEventProfilingInfo, 8, 14198, 1774 - clGetKernelInfo, 4, 2411, 602 - clReleaseCommandQueue, 1, 1046, 1046 - clReleaseContext, 1, 173, 173 - clReleaseKernel, 1, 2741, 2741 - clReleaseMemObject, 12, 110922, 9243 - clReleaseProgram, 1, 11561, 11561 - clSetKernelArg, 16, 75282, 4705 +=== API Timing Results: === + +Total Execution Time (ns): 363687486 +Total API Time for CPU backend (ns): 524 +Total API Time for GPU backend (ns): 355355363 + +== CPU Backend: == + + Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) +clGetDeviceIDs, 1, 524, 100.00, 524, 524, 524 + +== GPU Backend: == + + Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns) + clBuildProgram, 1, 173888026, 48.93, 173888026, 173888026, 173888026 + clFinish, 4, 172908147, 48.66, 43227036, 42711785, 44318785 + clEnqueueWriteBuffer, 8, 4636256, 1.30, 579532, 207825, 1864890 + clEnqueueReadBuffer, 4, 2051244, 0.58, 512811, 498662, 542971 + clEnqueueNDRangeKernel, 4, 1623139, 0.46, 405784, 236120, 609050 + clReleaseMemObject, 12, 95182, 0.03, 7931, 3525, 16436 + clCreateBuffer, 12, 81056, 0.02, 6754, 2511, 16990 + clSetKernelArg, 16, 24515, 0.01, 1532, 141, 7038 + clGetEventProfilingInfo, 8, 13139, 0.00, 1642, 103, 3288 + clCreateContext, 1, 12680, 0.00, 12680, 12680, 12680 + clReleaseProgram, 1, 9503, 0.00, 9503, 9503, 9503 + clCreateProgramWithSource, 1, 3880, 0.00, 3880, 3880, 3880 + clCreateKernel, 1, 2941, 0.00, 2941, 2941, 2941 + clReleaseKernel, 1, 1679, 0.00, 1679, 1679, 1679 + clGetKernelInfo, 4, 1617, 0.00, 404, 190, 552 +clCreateCommandQueueWithProperties, 1, 1388, 0.00, 1388, 1388, 1388 + clGetDeviceIDs, 2, 311, 0.00, 155, 138, 173 + clReleaseCommandQueue, 1, 270, 0.00, 270, 270, 270 + clGetDeviceInfo, 2, 227, 0.00, 113, 103, 124 + clReleaseContext, 1, 163, 0.00, 163, 163, 163 ``` ## Supported OS - Linux @@ -35,7 +48,8 @@ clCreateCommandQueueWithProperties, 1, 1265, - [Git](https://git-scm.com/) (version 1.8 and above) - [Python](https://www.python.org/) (version 2.7 and above) - [OpenCL(TM) ICD Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader) -- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) +- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) to run on GPU +- [Intel(R) Xeon(R) Processor / Intel(R) Core(TM) Processor (CPU) Runtimes](https://software.intel.com/en-us/articles/opencl-drivers#cpu-section) to run on CPU ## Build and Run ### Linux @@ -51,9 +65,10 @@ Use this command line to run the tool: ```sh ./cl_hot_functions ``` -One may use [cl_gemm](../cl_gemm) as target application: +One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application: ```sh ./cl_hot_functions ../../cl_gemm/build/cl_gemm +./cl_hot_functions ../../dpc_gemm/build/dpc_gemm cpu ``` ### Windows Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample: @@ -68,7 +83,8 @@ Use this command line to run the tool: ```sh cl_hot_functions.exe ``` -One may use [cl_gemm](../cl_gemm) as target application: +One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application: ```sh cl_hot_functions.exe ..\..\cl_gemm\build\cl_gemm.exe +cl_hot_functions.exe ..\..\dpc_gemm\build\dpc_gemm.exe cpu ``` \ No newline at end of file diff --git a/samples/cl_hot_functions/cl_api_collector.h b/samples/cl_hot_functions/cl_api_collector.h new file mode 100644 index 0000000..18ac7f8 --- /dev/null +++ b/samples/cl_hot_functions/cl_api_collector.h @@ -0,0 +1,205 @@ +//============================================================== +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= + +#ifndef PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ +#define PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ + +#include +#include +#include +#include +#include +#include + +#include "cl_api_tracer.h" +#include "cl_utils.h" + +struct ClFunction { + uint64_t total_time; + uint64_t min_time; + uint64_t max_time; + uint64_t call_count; + + bool operator>(const ClFunction& r) const { + if (total_time != r.total_time) { + return total_time > r.total_time; + } + return call_count > r.call_count; + } + + bool operator!=(const ClFunction& r) const { + if (total_time == r.total_time) { + return call_count != r.call_count; + } + return true; + } +}; + +using ClFunctionInfoMap = std::map; + +class ClApiCollector { + public: // User Interface + static ClApiCollector* Create(cl_device_id device) { + PTI_ASSERT(device != nullptr); + + ClApiCollector* collector = new ClApiCollector(); + PTI_ASSERT(collector != nullptr); + + ClApiTracer* tracer = new ClApiTracer(device, Callback, collector); + if (tracer == nullptr || !tracer->IsValid()) { + std::cerr << "[WARNING] Unable to create OpenCL tracer " << + "for target device" << std::endl; + if (tracer != nullptr) { + delete tracer; + delete collector; + } + return nullptr; + } + + collector->EnableTracing(tracer); + return collector; + } + + ~ClApiCollector() { + if (tracer_ != nullptr) { + delete tracer_; + } + } + + void DisableTracing() { + PTI_ASSERT(tracer_ != nullptr); + bool disabled = tracer_->Disable(); + PTI_ASSERT(disabled); + } + + const ClFunctionInfoMap& GetFunctionInfoMap() const { + return function_info_map_; + } + + ClApiCollector(const ClApiCollector& copy) = delete; + ClApiCollector& operator=(const ClApiCollector& copy) = delete; + + static void PrintFunctionsTable(const ClFunctionInfoMap& function_info_map) { + std::set< std::pair, + utils::Comparator > sorted_list( + function_info_map.begin(), function_info_map.end()); + + uint64_t total_duration = 0; + size_t max_name_length = kFunctionLength; + for (auto& value : sorted_list) { + total_duration += value.second.total_time; + if (value.first.size() > max_name_length) { + max_name_length = value.first.size(); + } + } + + if (total_duration == 0) { + return; + } + + std::cerr << std::setw(max_name_length) << "Function" << "," << + std::setw(kCallsLength) << "Calls" << "," << + std::setw(kTimeLength) << "Time (ns)" << "," << + std::setw(kPercentLength) << "Time (%)" << "," << + std::setw(kTimeLength) << "Average (ns)" << "," << + std::setw(kTimeLength) << "Min (ns)" << "," << + std::setw(kTimeLength) << "Max (ns)" << std::endl; + + for (auto& value : sorted_list) { + const std::string& function = value.first; + uint64_t call_count = value.second.call_count; + uint64_t duration = value.second.total_time; + uint64_t avg_duration = duration / call_count; + uint64_t min_duration = value.second.min_time; + uint64_t max_duration = value.second.max_time; + float percent_duration = 100.0f * duration / total_duration; + std::cerr << std::setw(max_name_length) << function << "," << + std::setw(kCallsLength) << call_count << "," << + std::setw(kTimeLength) << duration << "," << + std::setw(kPercentLength) << std::setprecision(2) << + std::fixed << percent_duration << "," << + std::setw(kTimeLength) << avg_duration << "," << + std::setw(kTimeLength) << min_duration << "," << + std::setw(kTimeLength) << max_duration << std::endl; + } + } + + private: // Implementation Details + ClApiCollector() {} + + void EnableTracing(ClApiTracer* tracer) { + PTI_ASSERT(tracer != nullptr); + tracer_ = tracer; + + for (int id = 0; id < CL_FUNCTION_COUNT; ++id) { + bool set = tracer_->SetTracingFunction(static_cast(id)); + PTI_ASSERT(set); + } + + bool enabled = tracer_->Enable(); + PTI_ASSERT(enabled); + } + + uint64_t GetTimestamp() const { + std::chrono::duration timestamp = + std::chrono::steady_clock::now() - base_time_; + return timestamp.count(); + } + + void AddFunctionTime(const std::string& name, uint64_t time) { + const std::lock_guard lock(lock_); + if (function_info_map_.count(name) == 0) { + function_info_map_[name] = {time, time, time, 1}; + } else { + ClFunction& function = function_info_map_[name]; + function.total_time += time; + if (time < function.min_time) { + function.min_time = time; + } + if (time > function.max_time) { + function.max_time = time; + } + ++function.call_count; + } + } + + private: // Callbacks + static void Callback( + cl_function_id function, + cl_callback_data* callback_data, + void* user_data) { + ClApiCollector* collector = reinterpret_cast(user_data); + PTI_ASSERT(collector != nullptr); + PTI_ASSERT(callback_data != nullptr); + PTI_ASSERT(callback_data->correlationData != nullptr); + + if (callback_data->site == CL_CALLBACK_SITE_ENTER) { + uint64_t& start_time = *reinterpret_cast( + callback_data->correlationData); + start_time = collector->GetTimestamp(); + } else { + uint64_t end_time = collector->GetTimestamp(); + uint64_t& start_time = *reinterpret_cast( + callback_data->correlationData); + collector->AddFunctionTime( + callback_data->functionName, end_time - start_time); + } + } + + private: // Data + ClApiTracer* tracer_ = nullptr; + std::chrono::time_point base_time_; + + std::mutex lock_; + ClFunctionInfoMap function_info_map_; + + static const uint32_t kFunctionLength = 10; + static const uint32_t kCallsLength = 12; + static const uint32_t kTimeLength = 20; + static const uint32_t kPercentLength = 10; +}; + +#endif // PTI_SAMPLES_CL_HOT_FUNCTIONS_CL_API_COLLECTOR_H_ \ No newline at end of file diff --git a/samples/cl_hot_functions/tool.cc b/samples/cl_hot_functions/tool.cc index 036242d..9bfd749 100644 --- a/samples/cl_hot_functions/tool.cc +++ b/samples/cl_hot_functions/tool.cc @@ -13,25 +13,11 @@ #include #include -#include +#include "cl_api_collector.h" -#include "cl_utils.h" -#include "pti_assert.h" - -// Pointers to tracing functions -static decltype(clCreateTracingHandleINTEL)* clCreateTracingHandle = nullptr; -static decltype(clSetTracingPointINTEL)* clSetTracingPoint = nullptr; -static decltype(clDestroyTracingHandleINTEL)* clDestroyTracingHandle = nullptr; -static decltype(clEnableTracingINTEL)* clEnableTracing = nullptr; -static decltype(clDisableTracingINTEL)* clDisableTracing = nullptr; - -// Tracing handle -static cl_tracing_handle tracer = nullptr; - -// Function maps & mutex -static std::map function_time_map; -static std::map function_count_map; -std::mutex lock; +static ClApiCollector* cpu_collector = nullptr; +static ClApiCollector* gpu_collector = nullptr; +static std::chrono::steady_clock::time_point start; // External Tool Interface //////////////////////////////////////////////////// @@ -61,172 +47,110 @@ void SetToolEnv() {} // Internal Tool Functionality //////////////////////////////////////////////// -static bool LoadTracingFunctions(cl_device_id device) { - PTI_ASSERT(device != nullptr); - - cl_int status = CL_SUCCESS; - - cl_platform_id platform = nullptr; - status = clGetDeviceInfo( - device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); - PTI_ASSERT(status == CL_SUCCESS); - - clCreateTracingHandle = - reinterpret_cast( - clGetExtensionFunctionAddressForPlatform( - platform, "clCreateTracingHandleINTEL")); - clSetTracingPoint = - reinterpret_cast( - clGetExtensionFunctionAddressForPlatform( - platform, "clSetTracingPointINTEL")); - clDestroyTracingHandle = - reinterpret_cast( - clGetExtensionFunctionAddressForPlatform( - platform, "clDestroyTracingHandleINTEL")); - clEnableTracing = - reinterpret_cast( - clGetExtensionFunctionAddressForPlatform( - platform, "clEnableTracingINTEL")); - clDisableTracing = - reinterpret_cast( - clGetExtensionFunctionAddressForPlatform( - platform, "clDisableTracingINTEL")); - - if (clCreateTracingHandle == nullptr || - clSetTracingPoint == nullptr || - clDestroyTracingHandle == nullptr || - clEnableTracing == nullptr || - clDisableTracing == nullptr) { - return false; - } - - return true; -} +static uint64_t CalculateTotalTime(ClApiCollector* collector) { + PTI_ASSERT(collector != nullptr); + uint64_t total_duration = 0; -static void Callback( - cl_function_id function, - cl_callback_data* callback_data, - void* user_data) { - PTI_ASSERT(callback_data != nullptr); - PTI_ASSERT(callback_data->correlationData != nullptr); - - // Get current time point - std::chrono::duration time = - std::chrono::steady_clock::now().time_since_epoch(); - - if (callback_data->site == CL_CALLBACK_SITE_ENTER) { // Before the function - uint64_t& start_time = *reinterpret_cast( - callback_data->correlationData); - start_time = time.count(); - } else { // After the function - uint64_t end_time = time.count(); - uint64_t& start_time = *reinterpret_cast( - callback_data->correlationData); - - { - const std::lock_guard guard(lock); - - if (function_time_map.count(callback_data->functionName) == 0) { - function_time_map[callback_data->functionName] = - end_time - start_time; - } else { - function_time_map[callback_data->functionName] += - end_time - start_time; - } - - if (function_count_map.count(callback_data->functionName) == 0) { - function_count_map[callback_data->functionName] = 1; - } else { - function_count_map[callback_data->functionName] += 1; - } + const ClFunctionInfoMap& function_info_map = collector->GetFunctionInfoMap(); + if (function_info_map.size() != 0) { + for (auto& value : function_info_map) { + total_duration += value.second.total_time; } } + + return total_duration; +} + +static void PrintDeviceTable( + ClApiCollector* collector, const char* device_type) { + PTI_ASSERT(collector != nullptr); + PTI_ASSERT(device_type != nullptr); + + uint64_t total_duration = CalculateTotalTime(collector); + if (total_duration > 0) { + std::cerr << std::endl; + std::cerr << "== " << device_type << " Backend: ==" << std::endl; + std::cerr << std::endl; + + const ClFunctionInfoMap& function_info_map = collector->GetFunctionInfoMap(); + PTI_ASSERT(function_info_map.size() > 0); + ClApiCollector::PrintFunctionsTable(function_info_map); + } } static void PrintResults() { - if (function_time_map.empty()) { + if (cpu_collector == nullptr && gpu_collector == nullptr) { return; } - size_t function_length = 0; - for (auto& item : function_time_map) { - auto& name = item.first; - if (name.size() > function_length) { - function_length = name.size(); - } - } - PTI_ASSERT(function_length > 0); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + std::chrono::duration time = end - start; std::cerr << std::endl; - std::cerr << std::setw(function_length) << "Function" << "," << - std::setw(12) << "Calls" << "," << - std::setw(20) << "Time (ns)" << "," << - std::setw(20) << "Average (ns)" << std::endl; - - for (auto& item : function_time_map) { - auto& name = item.first; - uint64_t time = item.second; - PTI_ASSERT(function_count_map.count(name) == 1); - uint64_t count = function_count_map[name]; - std::cerr << std::setw(function_length) << name << "," << - std::setw(12) << count << "," << - std::setw(20) << time << "," << - std::setw(20) << time / count << std::endl; + std::cerr << "=== API Timing Results: ===" << std::endl; + std::cerr << std::endl; + std::cerr << "Total Execution Time (ns): " << time.count() << std::endl; + + if (cpu_collector != nullptr) { + std::cerr << "Total API Time for CPU backend (ns): " << + CalculateTotalTime(cpu_collector) << std::endl; + } + if (gpu_collector != nullptr) { + std::cerr << "Total API Time for GPU backend (ns): " << + CalculateTotalTime(gpu_collector) << std::endl; + } + + if (cpu_collector != nullptr) { + PrintDeviceTable(cpu_collector, "CPU"); + } + if (gpu_collector != nullptr) { + PrintDeviceTable(gpu_collector, "GPU"); } + std::cerr << std::endl; } // Internal Tool Interface //////////////////////////////////////////////////// void EnableProfiling() { - cl_int status = CL_SUCCESS; - - // Get GPU device - cl_device_id device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU); - if (device == nullptr) { - std::cerr << - "[WARNING] Unable to find GPU device for tracing" << std::endl; + cl_device_id cpu_device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_CPU); + cl_device_id gpu_device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU); + if (cpu_device == nullptr && gpu_device == nullptr) { + std::cerr << "[WARNING] Unable to find device for tracing" << std::endl; return; } - // Get pointers for tracing functions - bool loaded = LoadTracingFunctions(device); - if (!loaded) { - std::cerr << - "[WARNING] Unable to load pointers for tracing functions" << std::endl; - return; + if (gpu_device == nullptr) { + std::cerr << "[WARNING] Unable to find GPU device for tracing" << + std::endl; + } + if (cpu_device == nullptr) { + std::cerr << "[WARNING] Unable to find CPU device for tracing" << + std::endl; } - // Create tracing handle - status = clCreateTracingHandle(device, Callback, nullptr, &tracer); - PTI_ASSERT(status == CL_SUCCESS); - - // Switch on tracing for all of the functions - for (int fid = 0; fid < CL_FUNCTION_COUNT; ++fid) { - status = clSetTracingPoint( - tracer, static_cast(fid), CL_TRUE); - PTI_ASSERT(status == CL_SUCCESS); + if (cpu_device != nullptr) { + cpu_collector = ClApiCollector::Create(cpu_device); + } + if (gpu_device != nullptr) { + gpu_collector = ClApiCollector::Create(gpu_device); } - // Enable tracing - status = clEnableTracing(tracer); - PTI_ASSERT(status == CL_SUCCESS); + start = std::chrono::steady_clock::now(); } void DisableProfiling() { - if (tracer == nullptr) { - return; + if (cpu_collector != nullptr) { + cpu_collector->DisableTracing(); + } + if (gpu_collector != nullptr) { + gpu_collector->DisableTracing(); } - - cl_int status = CL_SUCCESS; - - // Disable tracing - status = clDisableTracing(tracer); - PTI_ASSERT(status == CL_SUCCESS); - - // Destroy tracing handle - status = clDestroyTracingHandle(tracer); - PTI_ASSERT(status == CL_SUCCESS); - PrintResults(); + if (cpu_collector != nullptr) { + delete cpu_collector; + } + if (gpu_collector != nullptr) { + delete gpu_collector; + } } \ No newline at end of file diff --git a/samples/cl_hot_kernels/CMakeLists.txt b/samples/cl_hot_kernels/CMakeLists.txt index 82a37fd..579fe05 100644 --- a/samples/cl_hot_kernels/CMakeLists.txt +++ b/samples/cl_hot_kernels/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(clt_hot_kernels SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(clt_hot_kernels - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(clt_hot_kernels PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -26,9 +26,9 @@ GetOpenCLTracingHeaders(clt_hot_kernels) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_hot_kernels") -add_executable(cl_hot_kernels "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(cl_hot_kernels "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(cl_hot_kernels - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(cl_hot_kernels dl) diff --git a/samples/cl_hot_kernels/README.md b/samples/cl_hot_kernels/README.md index 4f19c98..24c49b8 100644 --- a/samples/cl_hot_kernels/README.md +++ b/samples/cl_hot_kernels/README.md @@ -1,6 +1,6 @@ # OpenCL(TM) Hot Functions ## Overview -This sample is a simple LD_PRELOAD based tool that allows to collect OpenCL(TM) kernels within an application along with their total execution time and call count. +This is a simple LD_PRELOAD based tool that allows to collect OpenCL(TM) kernels within an application along with their total execution time and call count. As a result, table like the following will be printed. ``` diff --git a/samples/cl_hot_kernels/cl_kernel_collector.h b/samples/cl_hot_kernels/cl_kernel_collector.h index 3dc24fb..9e8660e 100644 --- a/samples/cl_hot_kernels/cl_kernel_collector.h +++ b/samples/cl_hot_kernels/cl_kernel_collector.h @@ -172,6 +172,9 @@ class ClKernelCollector { void AddKernelInfo(std::string name, uint64_t time, size_t simd_width) { PTI_ASSERT(!name.empty()); + PTI_ASSERT(time > 0); + + const std::lock_guard lock(lock_); if (kernel_info_map_.count(name) == 0) { kernel_info_map_[name] = {time, time, time, 1, simd_width}; } else { @@ -191,6 +194,7 @@ class ClKernelCollector { void AddKernelInterval(std::string name, uint64_t start, uint64_t end) { PTI_ASSERT(!name.empty()); PTI_ASSERT(start < end); + const std::lock_guard lock(lock_); kernel_interval_list_.push_back({name, start, end}); } diff --git a/samples/dpc_gemm/CMakeLists.txt b/samples/dpc_gemm/CMakeLists.txt index b5c1f83..4309f4a 100644 --- a/samples/dpc_gemm/CMakeLists.txt +++ b/samples/dpc_gemm/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -16,7 +16,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gline-tables-only") add_executable(dpc_gemm main.cc) target_include_directories(dpc_gemm - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(dpc_gemm PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/dpc_info/CMakeLists.txt b/samples/dpc_info/CMakeLists.txt index dea7650..df112ab 100644 --- a/samples/dpc_info/CMakeLists.txt +++ b/samples/dpc_info/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) diff --git a/samples/gpu_inst_count/CMakeLists.txt b/samples/gpu_inst_count/CMakeLists.txt index d779896..1337c00 100644 --- a/samples/gpu_inst_count/CMakeLists.txt +++ b/samples/gpu_inst_count/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,9 +8,9 @@ SetBuildType() # Tool Library -add_library(gput_inst_count SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc) +add_library(gput_inst_count SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(gput_inst_count - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(gput_inst_count PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -25,9 +25,9 @@ GetGTPinHeaders(gput_inst_count) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=gput_inst_count") -add_executable(gpu_inst_count "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(gpu_inst_count "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(gpu_inst_count - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(gpu_inst_count dl) diff --git a/samples/gpu_perfmon_read/CMakeLists.txt b/samples/gpu_perfmon_read/CMakeLists.txt index e210db7..8c6b437 100644 --- a/samples/gpu_perfmon_read/CMakeLists.txt +++ b/samples/gpu_perfmon_read/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,9 +8,9 @@ SetBuildType() # Tool Library -add_library(gput_perfmon_read SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc) +add_library(gput_perfmon_read SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(gput_perfmon_read - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(gput_perfmon_read PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -25,9 +25,9 @@ GetGTPinHeaders(gput_perfmon_read) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=gput_perfmon_read") -add_executable(gpu_perfmon_read "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(gpu_perfmon_read "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(gpu_perfmon_read - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(gpu_perfmon_read dl) diff --git a/samples/gpu_perfmon_set/CMakeLists.txt b/samples/gpu_perfmon_set/CMakeLists.txt index e842a40..2242fed 100644 --- a/samples/gpu_perfmon_set/CMakeLists.txt +++ b/samples/gpu_perfmon_set/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -12,7 +12,7 @@ endif() add_executable(gpu_perfmon_set main.cc) target_include_directories(gpu_perfmon_set - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(gpu_perfmon_set PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/omp_gemm/CMakeLists.txt b/samples/omp_gemm/CMakeLists.txt index 8c8ad69..e09fd88 100644 --- a/samples/omp_gemm/CMakeLists.txt +++ b/samples/omp_gemm/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -20,7 +20,7 @@ endif() add_executable(omp_gemm main.cc) target_include_directories(omp_gemm - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(omp_gemm PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/omp_hot_regions/CMakeLists.txt b/samples/omp_hot_regions/CMakeLists.txt index b87f4c3..104ed63 100644 --- a/samples/omp_hot_regions/CMakeLists.txt +++ b/samples/omp_hot_regions/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -16,7 +16,7 @@ CheckForOMPTHeaders() add_library(omp_hot_regions SHARED tool.cc) target_include_directories(omp_hot_regions - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(omp_hot_regions PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/ze_debug_info/CMakeLists.txt b/samples/ze_debug_info/CMakeLists.txt index e7fdda7..466dabd 100644 --- a/samples/ze_debug_info/CMakeLists.txt +++ b/samples/ze_debug_info/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,9 +8,9 @@ SetBuildType() # Tool Library -add_library(zet_debug_info SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc) +add_library(zet_debug_info SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(zet_debug_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_debug_info PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -28,9 +28,9 @@ GetGmmHeaders(zet_debug_info) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_debug_info") -add_executable(ze_debug_info "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(ze_debug_info "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_debug_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_debug_info dl) diff --git a/samples/ze_gemm/CMakeLists.txt b/samples/ze_gemm/CMakeLists.txt index 8319577..b94a119 100644 --- a/samples/ze_gemm/CMakeLists.txt +++ b/samples/ze_gemm/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(ze_gemm main.cc) target_include_directories(ze_gemm - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(ze_gemm PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/ze_hot_functions/CMakeLists.txt b/samples/ze_hot_functions/CMakeLists.txt index 426aee6..cdb911f 100644 --- a/samples/ze_hot_functions/CMakeLists.txt +++ b/samples/ze_hot_functions/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(zet_hot_functions SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(zet_hot_functions - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_hot_functions PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -26,9 +26,9 @@ FindL0HeadersPath(zet_hot_functions "${PROJECT_SOURCE_DIR}/gen_tracing_callbacks # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_hot_functions") -add_executable(ze_hot_functions "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(ze_hot_functions "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_hot_functions - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_hot_functions dl) diff --git a/samples/ze_hot_kernels/CMakeLists.txt b/samples/ze_hot_kernels/CMakeLists.txt index 3ddb304..a4ae2e1 100644 --- a/samples/ze_hot_kernels/CMakeLists.txt +++ b/samples/ze_hot_kernels/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(zet_hot_kernels SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(zet_hot_kernels - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_hot_kernels PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -29,9 +29,9 @@ endif() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_hot_kernels") -add_executable(ze_hot_kernels "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(ze_hot_kernels "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_hot_kernels - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_hot_kernels dl) diff --git a/samples/ze_info/CMakeLists.txt b/samples/ze_info/CMakeLists.txt index fd8f9af..ddf61af 100644 --- a/samples/ze_info/CMakeLists.txt +++ b/samples/ze_info/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(ze_info main.cc) target_include_directories(ze_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(ze_info PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/ze_metric_info/CMakeLists.txt b/samples/ze_metric_info/CMakeLists.txt index ed6a0c1..be4bf11 100644 --- a/samples/ze_metric_info/CMakeLists.txt +++ b/samples/ze_metric_info/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(ze_metric_info main.cc) target_include_directories(ze_metric_info - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(ze_metric_info PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/samples/ze_metric_query/CMakeLists.txt b/samples/ze_metric_query/CMakeLists.txt index 5661aac..ba1a72e 100644 --- a/samples/ze_metric_query/CMakeLists.txt +++ b/samples/ze_metric_query/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,9 +8,9 @@ SetBuildType() # Tool Library -add_library(zet_metric_query SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc) +add_library(zet_metric_query SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(zet_metric_query - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_metric_query PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -25,9 +25,9 @@ CheckForMetricsLibrary() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_metric_query") -add_executable(ze_metric_query "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(ze_metric_query "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_metric_query - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_metric_query dl) diff --git a/samples/ze_metric_streamer/CMakeLists.txt b/samples/ze_metric_streamer/CMakeLists.txt index 356db39..3bbf048 100644 --- a/samples/ze_metric_streamer/CMakeLists.txt +++ b/samples/ze_metric_streamer/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,10 +9,10 @@ SetBuildType() # Tool Library add_library(zet_metric_streamer SHARED - "${PROJECT_SOURCE_DIR}/../loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc) target_include_directories(zet_metric_streamer - PRIVATE "${PROJECT_SOURCE_DIR}/../utils" + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" PRIVATE "${PROJECT_SOURCE_DIR}/../ze_hot_kernels") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_metric_streamer @@ -35,9 +35,9 @@ endif() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_metric_streamer") -add_executable(ze_metric_streamer "${PROJECT_SOURCE_DIR}/../loader/loader.cc") +add_executable(ze_metric_streamer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_metric_streamer - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_metric_streamer dl) diff --git a/samples/ze_sysman/CMakeLists.txt b/samples/ze_sysman/CMakeLists.txt index 81a0e7d..f300a67 100644 --- a/samples/ze_sysman/CMakeLists.txt +++ b/samples/ze_sysman/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(ze_sysman main.cc) target_include_directories(ze_sysman - PRIVATE "${PROJECT_SOURCE_DIR}/../utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(ze_sysman PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/tests/cl_hot_functions.py b/tests/cl_hot_functions.py deleted file mode 100644 index f0bffc3..0000000 --- a/tests/cl_hot_functions.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import subprocess -import sys - -import cl_gemm -import utils - -def config(path): - p = subprocess.Popen(["cmake",\ - "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) - p.wait() - stdout, stderr = utils.run_process(p) - if stderr and stderr.find("CMake Error") != -1: - return stderr - return None - -def build(path): - p = subprocess.Popen(["make"], cwd = path,\ - stdout = subprocess.PIPE, stderr = subprocess.PIPE) - p.wait() - stdout, stderr = utils.run_process(p) - if stderr and stderr.lower().find("error") != -1: - return stderr - return None - -def parse(output): - lines = output.split("\n") - total_time = 0 - for line in lines: - items = line.split(",") - if len(items) != 4 or line.find("Time (ns)") != -1: - continue - function_name = items[0].strip() - call_count = int(items[1].strip()) - time = int(items[2].strip()) - if not function_name or call_count <= 0: - return False - total_time += time - if total_time <= 0: - return False - return True - -def run(path): - app_folder = utils.get_sample_build_path("cl_gemm") - app_file = os.path.join(app_folder, "cl_gemm") - p = subprocess.Popen(["./cl_hot_functions", app_file, "gpu", "1024", "1"],\ - cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) - stdout, stderr = utils.run_process(p) - if not stdout: - return stderr - if stdout.find(" CORRECT") == -1: - return stdout - if not parse(stderr): - return stderr - return None - -def main(option): - path = utils.get_sample_build_path("cl_hot_functions") - log = cl_gemm.main("gpu") - if log: - return log - log = config(path) - if log: - return log - log = build(path) - if log: - return log - log = run(path) - if log: - return log - -if __name__ == "__main__": - log = main(None) - if log: - print(log) \ No newline at end of file diff --git a/tests/run.py b/tests/run.py index 1e4faea..c5da4e1 100644 --- a/tests/run.py +++ b/tests/run.py @@ -11,8 +11,8 @@ ["cl_gemm_itt", "gpu", "cpu"], ["cl_debug_info", None], ["cl_gpu_metrics", None], - ["cl_hot_functions", None], - ["cl_hot_kernels", "gpu", "cpu", "dpc", "omp"], + ["cl_hot_functions", "cpu", "gpu", "dpc", "omp"], + ["cl_hot_kernels", "cpu", "gpu", "dpc", "omp"], ["gpu_inst_count", "cl", "ze", "dpc"], ["gpu_perfmon_read", "cl", "ze", "dpc"], ["gpu_perfmon_set", None], @@ -32,7 +32,7 @@ tools = [["gpuinfo", "-l", "-i", "-m"], ["onetrace", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "cl", "ze", "omp"], - ["cl_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "gpu", "dpc"], + ["cl_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "gpu", "dpc", "omp"], ["ze_tracer", "-c", "-h", "-d", "-t", "--chrome-device-timeline", "--chrome-call-logging", "--chrome-device-stages", "dpc", "omp"]] def remove_python_cache(path): @@ -58,6 +58,8 @@ def clean(): remove_python_cache(utils.get_build_utils_path()) remove_python_cache(utils.get_script_path()) + remove_python_cache(os.path.join(utils.get_script_path(), "samples")) + remove_python_cache(os.path.join(utils.get_script_path(), "tools")) for root, subdirs, files in os.walk(utils.get_root_path()): for file in files: @@ -76,8 +78,13 @@ def test(f, name, option, istool = False): else: sys.stdout.write("Running sample test for " + name + "...") sys.stdout.flush() - module = importlib.import_module(name) + + if istool: + module = importlib.import_module("tools." + name) + else: + module = importlib.import_module("samples." + name) log = module.main(option) + if log: sys.stdout.write("FAILED\n") if option: diff --git a/tests/cl_debug_info.py b/tests/samples/cl_debug_info.py similarity index 93% rename from tests/cl_debug_info.py rename to tests/samples/cl_debug_info.py index aba5301..91b9b27 100644 --- a/tests/cl_debug_info.py +++ b/tests/samples/cl_debug_info.py @@ -2,7 +2,7 @@ import subprocess import sys -import cl_gemm +from samples import cl_gemm import utils def config(path): @@ -30,8 +30,10 @@ def run(path): p = subprocess.Popen(["./cl_debug_info", app_file, "gpu", "1024", "1"], cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("__kernel") == -1 or stderr.find("for") == -1: diff --git a/tests/cl_gemm.py b/tests/samples/cl_gemm.py similarity index 96% rename from tests/cl_gemm.py rename to tests/samples/cl_gemm.py index 2944975..82c7f44 100644 --- a/tests/cl_gemm.py +++ b/tests/samples/cl_gemm.py @@ -29,6 +29,8 @@ def run(path, option): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/cl_gemm_inst.py b/tests/samples/cl_gemm_inst.py similarity index 96% rename from tests/cl_gemm_inst.py rename to tests/samples/cl_gemm_inst.py index 2e9a1c0..8d10dcd 100644 --- a/tests/cl_gemm_inst.py +++ b/tests/samples/cl_gemm_inst.py @@ -29,6 +29,8 @@ def run(path): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/cl_gemm_itt.py b/tests/samples/cl_gemm_itt.py similarity index 96% rename from tests/cl_gemm_itt.py rename to tests/samples/cl_gemm_itt.py index 33a0fe6..f6a9033 100644 --- a/tests/cl_gemm_itt.py +++ b/tests/samples/cl_gemm_itt.py @@ -29,6 +29,8 @@ def run(path, option): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/cl_gpu_metrics.py b/tests/samples/cl_gpu_metrics.py similarity index 95% rename from tests/cl_gpu_metrics.py rename to tests/samples/cl_gpu_metrics.py index 1e2f754..7e47be5 100644 --- a/tests/cl_gpu_metrics.py +++ b/tests/samples/cl_gpu_metrics.py @@ -2,7 +2,7 @@ import subprocess import sys -import cl_gemm +from samples import cl_gemm import utils def config(path): @@ -55,7 +55,9 @@ def run(path): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/samples/cl_hot_functions.py b/tests/samples/cl_hot_functions.py new file mode 100644 index 0000000..ffd668f --- /dev/null +++ b/tests/samples/cl_hot_functions.py @@ -0,0 +1,110 @@ +import os +import subprocess +import sys + +from samples import cl_gemm +from samples import dpc_gemm +from samples import omp_gemm +import utils + +def config(path): + p = subprocess.Popen(["cmake",\ + "-DCMAKE_BUILD_TYPE=" + utils.get_build_flag(), ".."],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + p.wait() + stdout, stderr = utils.run_process(p) + if stderr and stderr.find("CMake Error") != -1: + return stderr + return None + +def build(path): + p = subprocess.Popen(["make"], cwd = path,\ + stdout = subprocess.PIPE, stderr = subprocess.PIPE) + p.wait() + stdout, stderr = utils.run_process(p) + if stderr and stderr.lower().find("error") != -1: + return stderr + return None + +def parse(output): + lines = output.split("\n") + total_time = 0 + for line in lines: + items = line.split(",") + if len(items) != 7 or line.find("Time (ns)") != -1: + continue + function_name = items[0].strip() + call_count = int(items[1].strip()) + time = int(items[2].strip()) + if not function_name or call_count <= 0: + return False + total_time += time + if total_time <= 0: + return False + return True + +def run(path, option): + if option == "dpc": + app_folder = utils.get_sample_build_path("dpc_gemm") + app_file = os.path.join(app_folder, "dpc_gemm") + option = "cpu" + p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + elif option == "omp": + app_folder = utils.get_sample_build_path("omp_gemm") + app_file = os.path.join(app_folder, "omp_gemm") + option = "gpu" + e = utils.add_env(None, "LIBOMPTARGET_PLUGIN", "OPENCL") + p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ + env = e, cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + else: + app_folder = utils.get_sample_build_path("cl_gemm") + app_file = os.path.join(app_folder, "cl_gemm") + p = subprocess.Popen(["./cl_hot_functions", app_file, option, "1024", "1"],\ + cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" + if not stderr: + return "stderr is empty" + if stdout.find(" CORRECT") == -1: + return stdout + if not parse(stderr): + return stderr + return None + +def main(option): + path = utils.get_sample_build_path("cl_hot_functions") + if option == "dpc": + log = dpc_gemm.main("cpu") + if log: + return log + elif option == "omp": + log = omp_gemm.main("gpu") + if log: + return log + else: + log = cl_gemm.main(option) + if log: + return log + log = config(path) + if log: + return log + log = build(path) + if log: + return log + log = run(path, option) + if log: + return log + +if __name__ == "__main__": + option = "gpu" + if len(sys.argv) > 1 and sys.argv[1] == "cpu": + option = "cpu" + if len(sys.argv) > 1 and sys.argv[1] == "dpc": + option = "dpc" + if len(sys.argv) > 1 and sys.argv[1] == "omp": + option = "omp" + log = main(option) + if log: + print(log) \ No newline at end of file diff --git a/tests/cl_hot_kernels.py b/tests/samples/cl_hot_kernels.py similarity index 94% rename from tests/cl_hot_kernels.py rename to tests/samples/cl_hot_kernels.py index fa05bfe..d632dc3 100644 --- a/tests/cl_hot_kernels.py +++ b/tests/samples/cl_hot_kernels.py @@ -2,9 +2,9 @@ import subprocess import sys -import cl_gemm -import dpc_gemm -import omp_gemm +from samples import cl_gemm +from samples import dpc_gemm +from samples import omp_gemm import utils def config(path): @@ -64,7 +64,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/dpc_gemm.py b/tests/samples/dpc_gemm.py similarity index 96% rename from tests/dpc_gemm.py rename to tests/samples/dpc_gemm.py index d0683e4..69d7904 100644 --- a/tests/dpc_gemm.py +++ b/tests/samples/dpc_gemm.py @@ -29,6 +29,8 @@ def run(path, option): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/dpc_info.py b/tests/samples/dpc_info.py similarity index 96% rename from tests/dpc_info.py rename to tests/samples/dpc_info.py index e608185..5ab22a2 100644 --- a/tests/dpc_info.py +++ b/tests/samples/dpc_info.py @@ -55,7 +55,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if stderr: - return stdout + return stderr + if not stdout: + return "stdout is empty" if not parse(stdout, option): return stdout return None diff --git a/tests/gpu_inst_count.py b/tests/samples/gpu_inst_count.py similarity index 94% rename from tests/gpu_inst_count.py rename to tests/samples/gpu_inst_count.py index aa2a137..064f79f 100644 --- a/tests/gpu_inst_count.py +++ b/tests/samples/gpu_inst_count.py @@ -2,9 +2,9 @@ import subprocess import sys -import dpc_gemm -import cl_gemm -import ze_gemm +from samples import dpc_gemm +from samples import cl_gemm +from samples import ze_gemm import utils def config(path): @@ -62,8 +62,10 @@ def run(path, option): p = subprocess.Popen(["./gpu_inst_count", app_file, "gpu", "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("add") == -1 or stderr.find("mov") == -1 or stderr.find("send") == -1: diff --git a/tests/gpu_perfmon_read.py b/tests/samples/gpu_perfmon_read.py similarity index 94% rename from tests/gpu_perfmon_read.py rename to tests/samples/gpu_perfmon_read.py index c0839e0..8895aa2 100644 --- a/tests/gpu_perfmon_read.py +++ b/tests/samples/gpu_perfmon_read.py @@ -2,9 +2,9 @@ import subprocess import sys -import cl_gemm -import dpc_gemm -import ze_gemm +from samples import cl_gemm +from samples import dpc_gemm +from samples import ze_gemm import utils def config(path): @@ -64,8 +64,10 @@ def run(path, option): p = subprocess.Popen(["./gpu_perfmon_read", app_file, "gpu", "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("add") == -1 or stderr.find("mov") == -1 or stderr.find("send") == -1: diff --git a/tests/gpu_perfmon_set.py b/tests/samples/gpu_perfmon_set.py similarity index 96% rename from tests/gpu_perfmon_set.py rename to tests/samples/gpu_perfmon_set.py index 38bd97d..e6dde44 100644 --- a/tests/gpu_perfmon_set.py +++ b/tests/samples/gpu_perfmon_set.py @@ -29,6 +29,8 @@ def run(path): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find("GPU PefMon configuration is completed") == -1: return stdout return None diff --git a/tests/omp_gemm.py b/tests/samples/omp_gemm.py similarity index 96% rename from tests/omp_gemm.py rename to tests/samples/omp_gemm.py index a7e6c43..196dbef 100644 --- a/tests/omp_gemm.py +++ b/tests/samples/omp_gemm.py @@ -29,6 +29,8 @@ def run(path, option): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/omp_hot_regions.py b/tests/samples/omp_hot_regions.py similarity index 95% rename from tests/omp_hot_regions.py rename to tests/samples/omp_hot_regions.py index c74549a..bcd0cc7 100644 --- a/tests/omp_hot_regions.py +++ b/tests/samples/omp_hot_regions.py @@ -2,7 +2,7 @@ import subprocess import sys -import omp_gemm +from samples import omp_gemm import utils def config(path): @@ -49,8 +49,10 @@ def run(path, option): p = subprocess.Popen([app_file, option, "1024", "1"], env = e,\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr == "gpu" and stderr.find("Target") == -1: diff --git a/tests/ze_debug_info.py b/tests/samples/ze_debug_info.py similarity index 93% rename from tests/ze_debug_info.py rename to tests/samples/ze_debug_info.py index 80f6b31..a5b952a 100644 --- a/tests/ze_debug_info.py +++ b/tests/samples/ze_debug_info.py @@ -2,8 +2,8 @@ import subprocess import sys -import ze_gemm -import dpc_gemm +from samples import ze_gemm +from samples import dpc_gemm import utils def config(path): @@ -37,8 +37,10 @@ def run(path, option): p = subprocess.Popen(["./ze_debug_info", app_file, "1024", "1"], cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("for") == -1: diff --git a/tests/ze_gemm.py b/tests/samples/ze_gemm.py similarity index 96% rename from tests/ze_gemm.py rename to tests/samples/ze_gemm.py index 3301c9f..3ca1972 100644 --- a/tests/ze_gemm.py +++ b/tests/samples/ze_gemm.py @@ -29,6 +29,8 @@ def run(path): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if stdout.find(" CORRECT") == -1: return stdout return None diff --git a/tests/ze_hot_functions.py b/tests/samples/ze_hot_functions.py similarity index 94% rename from tests/ze_hot_functions.py rename to tests/samples/ze_hot_functions.py index bca1df9..b9b11ca 100644 --- a/tests/ze_hot_functions.py +++ b/tests/samples/ze_hot_functions.py @@ -2,9 +2,9 @@ import subprocess import sys -import dpc_gemm -import omp_gemm -import ze_gemm +from samples import dpc_gemm +from samples import omp_gemm +from samples import ze_gemm import utils def config(path): @@ -61,7 +61,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/ze_hot_kernels.py b/tests/samples/ze_hot_kernels.py similarity index 94% rename from tests/ze_hot_kernels.py rename to tests/samples/ze_hot_kernels.py index fd73296..c8045cb 100644 --- a/tests/ze_hot_kernels.py +++ b/tests/samples/ze_hot_kernels.py @@ -2,9 +2,9 @@ import subprocess import sys -import dpc_gemm -import omp_gemm -import ze_gemm +from samples import dpc_gemm +from samples import omp_gemm +from samples import ze_gemm import utils def config(path): @@ -61,7 +61,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/ze_info.py b/tests/samples/ze_info.py similarity index 97% rename from tests/ze_info.py rename to tests/samples/ze_info.py index 7543a7a..84b33ea 100644 --- a/tests/ze_info.py +++ b/tests/samples/ze_info.py @@ -62,7 +62,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if stderr: - return stdout + return stderr + if not stdout: + return "stdout is empty" if not parse(stdout, option): return stdout return None diff --git a/tests/ze_metric_info.py b/tests/samples/ze_metric_info.py similarity index 97% rename from tests/ze_metric_info.py rename to tests/samples/ze_metric_info.py index f4f6c5c..beb7a89 100644 --- a/tests/ze_metric_info.py +++ b/tests/samples/ze_metric_info.py @@ -54,6 +54,8 @@ def run(path): stdout, stderr = utils.run_process(p) if stderr: return stderr + if not stdout: + return "stdout is empty" if not parse(stdout): return stdout return None diff --git a/tests/ze_metric_query.py b/tests/samples/ze_metric_query.py similarity index 95% rename from tests/ze_metric_query.py rename to tests/samples/ze_metric_query.py index 724722e..47ce963 100644 --- a/tests/ze_metric_query.py +++ b/tests/samples/ze_metric_query.py @@ -2,8 +2,8 @@ import subprocess import sys -import dpc_gemm -import ze_gemm +from samples import dpc_gemm +from samples import ze_gemm import utils def config(path): @@ -62,7 +62,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/ze_metric_streamer.py b/tests/samples/ze_metric_streamer.py similarity index 95% rename from tests/ze_metric_streamer.py rename to tests/samples/ze_metric_streamer.py index 082e2fb..1352336 100644 --- a/tests/ze_metric_streamer.py +++ b/tests/samples/ze_metric_streamer.py @@ -2,8 +2,8 @@ import subprocess import sys -import ze_gemm -import dpc_gemm +from samples import ze_gemm +from samples import dpc_gemm import utils def config(path): @@ -62,7 +62,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if not stdout: - return stderr + return "stdout is empty" + if not stderr: + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if not parse(stderr): diff --git a/tests/ze_sysman.py b/tests/samples/ze_sysman.py similarity index 95% rename from tests/ze_sysman.py rename to tests/samples/ze_sysman.py index ed89db3..d7879c2 100644 --- a/tests/ze_sysman.py +++ b/tests/samples/ze_sysman.py @@ -40,7 +40,9 @@ def run(path): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if stderr: - return stdout + return stderr + if not stdout: + return "stdout is empty" if not parse(stdout): return stdout return None diff --git a/tests/cl_tracer.py b/tests/tools/cl_tracer.py similarity index 80% rename from tests/cl_tracer.py rename to tests/tools/cl_tracer.py index 9dc910d..848a48c 100644 --- a/tests/cl_tracer.py +++ b/tests/tools/cl_tracer.py @@ -2,8 +2,9 @@ import subprocess import sys -import cl_gemm -import dpc_gemm +from samples import cl_gemm +from samples import dpc_gemm +from samples import omp_gemm import utils def config(path): @@ -31,6 +32,12 @@ def run(path, option): app_file = os.path.join(app_folder, "dpc_gemm") p = subprocess.Popen(["./cl_tracer", "-h", "-d", app_file, "cpu", "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + elif option == "omp": + app_folder = utils.get_sample_build_path("omp_gemm") + app_file = os.path.join(app_folder, "omp_gemm") + e = utils.add_env(None, "LIBOMPTARGET_PLUGIN", "OPENCL") + p = subprocess.Popen(["./cl_tracer", "-h", "-d", app_file, "gpu", "1024", "1"],\ + env = e, cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) else: app_folder = utils.get_sample_build_path("cl_gemm") app_file = os.path.join(app_folder, "cl_gemm") @@ -41,8 +48,10 @@ def run(path, option): p = subprocess.Popen(["./cl_tracer", option, app_file, "cpu", "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("WARNING") != -1: @@ -53,6 +62,8 @@ def main(option): path = utils.get_tool_build_path("cl_tracer") if option == "dpc": log = dpc_gemm.main("cpu") + elif option == "omp": + log = omp_gemm.main("gpu") elif option == "gpu": log = cl_gemm.main("gpu") else: @@ -87,6 +98,8 @@ def main(option): option = "gpu" if len(sys.argv) > 1 and sys.argv[1] == "dpc": option = "dpc" + if len(sys.argv) > 1 and sys.argv[1] == "omp": + option = "omp" log = main(option) if log: print(log) \ No newline at end of file diff --git a/tests/gpuinfo.py b/tests/tools/gpuinfo.py similarity index 97% rename from tests/gpuinfo.py rename to tests/tools/gpuinfo.py index b063d6b..f4c6982 100644 --- a/tests/gpuinfo.py +++ b/tests/tools/gpuinfo.py @@ -83,7 +83,9 @@ def run(path, option): cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) if stderr: - return stdout + return stderr + if not stdout: + return "stdout is empty" if not parse(stdout, option): return stdout return None diff --git a/tests/onetrace.py b/tests/tools/onetrace.py similarity index 94% rename from tests/onetrace.py rename to tests/tools/onetrace.py index bf08e10..0d5cd32 100644 --- a/tests/onetrace.py +++ b/tests/tools/onetrace.py @@ -2,10 +2,10 @@ import subprocess import sys -import cl_gemm -import dpc_gemm -import omp_gemm -import ze_gemm +from samples import cl_gemm +from samples import dpc_gemm +from samples import omp_gemm +from samples import ze_gemm import utils def config(path): @@ -49,8 +49,10 @@ def run(path, option): p = subprocess.Popen(["./onetrace", option, app_file, "gpu", "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("WARNING") != -1: diff --git a/tests/ze_tracer.py b/tests/tools/ze_tracer.py similarity index 94% rename from tests/ze_tracer.py rename to tests/tools/ze_tracer.py index 1bec083..e744786 100644 --- a/tests/ze_tracer.py +++ b/tests/tools/ze_tracer.py @@ -2,9 +2,9 @@ import subprocess import sys -import dpc_gemm -import omp_gemm -import ze_gemm +from samples import dpc_gemm +from samples import omp_gemm +from samples import ze_gemm import utils def config(path): @@ -43,8 +43,10 @@ def run(path, option): p = subprocess.Popen(["./ze_tracer", option, app_file, "1024", "1"],\ cwd = path, stdout = subprocess.PIPE, stderr = subprocess.PIPE) stdout, stderr = utils.run_process(p) + if not stdout: + return "stdout is empty" if not stderr: - return stdout + return "stderr is empty" if stdout.find(" CORRECT") == -1: return stdout if stderr.find("WARNING") != -1: diff --git a/tests/utils.py b/tests/utils.py index f3178e8..72bfed9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -31,14 +31,7 @@ def get_tool_build_path(name): def get_build_utils_path(): head, tail = os.path.split(get_script_path()) - path = os.path.join(head, "samples") - path = os.path.join(path, "build_utils") - return path - -def get_samples_utils_path(): - head, tail = os.path.split(get_script_path()) - path = os.path.join(head, "samples") - path = os.path.join(path, "utils") + path = os.path.join(head, "build_utils") return path def get_build_flag(): diff --git a/tools/cl_tracer/CMakeLists.txt b/tools/cl_tracer/CMakeLists.txt index 9360ef1..0ce314f 100644 --- a/tools/cl_tracer/CMakeLists.txt +++ b/tools/cl_tracer/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../../samples/build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,7 +9,7 @@ SetBuildType() # Tool Library add_library(clt_tracer SHARED - "${PROJECT_SOURCE_DIR}/../../samples/loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" "${PROJECT_SOURCE_DIR}/../utils/logger.cc" trace_guard.cc @@ -17,7 +17,7 @@ add_library(clt_tracer SHARED target_include_directories(clt_tracer PRIVATE "${PROJECT_SOURCE_DIR}" PRIVATE "${PROJECT_SOURCE_DIR}/../utils" - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(clt_tracer PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -37,9 +37,9 @@ GetOpenCLTracingHeaders(clt_tracer) # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_tracer") -add_executable(cl_tracer "${PROJECT_SOURCE_DIR}/../../samples/loader/loader.cc") +add_executable(cl_tracer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(cl_tracer - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(cl_tracer dl) diff --git a/tools/cl_tracer/cl_kernel_collector.h b/tools/cl_tracer/cl_kernel_collector.h index 4e78381..4173b22 100644 --- a/tools/cl_tracer/cl_kernel_collector.h +++ b/tools/cl_tracer/cl_kernel_collector.h @@ -260,6 +260,9 @@ class ClKernelCollector { std::string name, uint64_t time, size_t simd_width, size_t bytes_transferred) { PTI_ASSERT(!name.empty()); + PTI_ASSERT(time > 0); + + const std::lock_guard lock(lock_); if (kernel_info_map_.count(name) == 0) { kernel_info_map_[name] = { time, time, time, 1, simd_width, bytes_transferred}; @@ -281,6 +284,7 @@ class ClKernelCollector { void AddKernelInterval(std::string name, uint64_t start, uint64_t end) { PTI_ASSERT(!name.empty()); PTI_ASSERT(start < end); + const std::lock_guard lock(lock_); kernel_interval_list_.push_back({name, start, end}); } diff --git a/tools/gpuinfo/CMakeLists.txt b/tools/gpuinfo/CMakeLists.txt index 581b8e7..c192605 100644 --- a/tools/gpuinfo/CMakeLists.txt +++ b/tools/gpuinfo/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../../samples/build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -8,7 +8,7 @@ SetBuildType() add_executable(gpuinfo main.cc) target_include_directories(gpuinfo - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(gpuinfo PUBLIC "${CMAKE_INCLUDE_PATH}") diff --git a/tools/onetrace/CMakeLists.txt b/tools/onetrace/CMakeLists.txt index d4b5268..aacafd3 100644 --- a/tools/onetrace/CMakeLists.txt +++ b/tools/onetrace/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../../samples/build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,7 +9,7 @@ SetBuildType() # Tool Library add_library(onetrace_tool SHARED - "${PROJECT_SOURCE_DIR}/../../samples/loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" "${PROJECT_SOURCE_DIR}/../cl_tracer/trace_guard.cc" "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" "${PROJECT_SOURCE_DIR}/../utils/logger.cc" @@ -17,7 +17,7 @@ add_library(onetrace_tool SHARED target_include_directories(onetrace_tool PRIVATE "${PROJECT_SOURCE_DIR}" PRIVATE "${PROJECT_SOURCE_DIR}/../utils" - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils" + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils" PRIVATE "${PROJECT_SOURCE_DIR}/../cl_tracer" PRIVATE "${PROJECT_SOURCE_DIR}/../ze_tracer") if(CMAKE_INCLUDE_PATH) @@ -49,9 +49,9 @@ endif() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=onetrace_tool") -add_executable(onetrace "${PROJECT_SOURCE_DIR}/../../samples/loader/loader.cc") +add_executable(onetrace "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(onetrace - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(onetrace dl) diff --git a/tools/ze_tracer/CMakeLists.txt b/tools/ze_tracer/CMakeLists.txt index 18529d5..9680034 100644 --- a/tools/ze_tracer/CMakeLists.txt +++ b/tools/ze_tracer/CMakeLists.txt @@ -1,4 +1,4 @@ -include("../../samples/build_utils/CMakeLists.txt") +include("../../build_utils/CMakeLists.txt") SetRequiredCMakeVersion() cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) @@ -9,14 +9,14 @@ SetBuildType() # Tool Library add_library(zet_tracer SHARED - "${PROJECT_SOURCE_DIR}/../../samples/loader/init.cc" + "${PROJECT_SOURCE_DIR}/../../loader/init.cc" "${PROJECT_SOURCE_DIR}/../utils/correlator.cc" "${PROJECT_SOURCE_DIR}/../utils/logger.cc" tool.cc) target_include_directories(zet_tracer PRIVATE "${PROJECT_SOURCE_DIR}" PRIVATE "${PROJECT_SOURCE_DIR}/../utils" - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(CMAKE_INCLUDE_PATH) target_include_directories(zet_tracer PUBLIC "${CMAKE_INCLUDE_PATH}") @@ -41,9 +41,9 @@ endif() # Loader set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=zet_tracer") -add_executable(ze_tracer "${PROJECT_SOURCE_DIR}/../../samples/loader/loader.cc") +add_executable(ze_tracer "${PROJECT_SOURCE_DIR}/../../loader/loader.cc") target_include_directories(ze_tracer - PRIVATE "${PROJECT_SOURCE_DIR}/../../samples/utils") + PRIVATE "${PROJECT_SOURCE_DIR}/../../utils") if(UNIX) target_link_libraries(ze_tracer dl) diff --git a/samples/utils/cl_api_tracer.h b/utils/cl_api_tracer.h similarity index 96% rename from samples/utils/cl_api_tracer.h rename to utils/cl_api_tracer.h index e593904..d9649aa 100644 --- a/samples/utils/cl_api_tracer.h +++ b/utils/cl_api_tracer.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_CL_API_TRACER_H_ -#define PTI_SAMPLES_UTILS_CL_API_TRACER_H_ +#ifndef PTI_UTILS_CL_API_TRACER_H_ +#define PTI_UTILS_CL_API_TRACER_H_ #include @@ -141,4 +141,4 @@ class ClApiTracer { decltype(clGetTracingStateINTEL)* clGetTracingState_ = nullptr; }; -#endif // PTI_SAMPLES_UTILS_CL_API_TRACER_H_ \ No newline at end of file +#endif // PTI_UTILS_CL_API_TRACER_H_ \ No newline at end of file diff --git a/samples/utils/cl_utils.h b/utils/cl_utils.h similarity index 98% rename from samples/utils/cl_utils.h rename to utils/cl_utils.h index a246f65..bb3344e 100644 --- a/samples/utils/cl_utils.h +++ b/utils/cl_utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_CL_UTILS_H_ -#define PTI_SAMPLES_UTILS_CL_UTILS_H_ +#ifndef PTI_UTILS_CL_UTILS_H_ +#define PTI_UTILS_CL_UTILS_H_ #include @@ -332,4 +332,4 @@ inline cl_ulong GetCpuTimestamp() { } // namespace cl } // namespace utils -#endif // PTI_SAMPLES_UTILS_CL_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_CL_UTILS_H_ \ No newline at end of file diff --git a/samples/utils/debug_abbrev_parser.h b/utils/debug_abbrev_parser.h similarity index 93% rename from samples/utils/debug_abbrev_parser.h rename to utils/debug_abbrev_parser.h index a69597a..30b27e8 100644 --- a/samples/utils/debug_abbrev_parser.h +++ b/utils/debug_abbrev_parser.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_DEBUG_ABBREV_PARSER_H_ -#define PTI_SAMPLES_UTILS_DEBUG_ABBREV_PARSER_H_ +#ifndef PTI_UTILS_DEBUG_ABBREV_PARSER_H_ +#define PTI_UTILS_DEBUG_ABBREV_PARSER_H_ #include "dwarf.h" #include "leb128.h" @@ -85,4 +85,4 @@ class DebugAbbrevParser { uint32_t size_; }; -#endif // PTI_SAMPLES_UTILS_DEBUG_ABBREV_PARSER_H_ \ No newline at end of file +#endif // PTI_UTILS_DEBUG_ABBREV_PARSER_H_ \ No newline at end of file diff --git a/samples/utils/debug_info_parser.h b/utils/debug_info_parser.h similarity index 95% rename from samples/utils/debug_info_parser.h rename to utils/debug_info_parser.h index 4b494fa..89f41c6 100644 --- a/samples/utils/debug_info_parser.h +++ b/utils/debug_info_parser.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_DEBUG_INFO_PARSER_H_ -#define PTI_SAMPLES_UTILS_DEBUG_INFO_PARSER_H_ +#ifndef PTI_UTILS_DEBUG_INFO_PARSER_H_ +#define PTI_UTILS_DEBUG_INFO_PARSER_H_ #include "dwarf.h" #include "leb128.h" @@ -104,4 +104,4 @@ class DebugInfoParser { uint32_t size_; }; -#endif // PTI_SAMPLES_UTILS_DEBUG_INFO_PARSER_H_ \ No newline at end of file +#endif // PTI_UTILS_DEBUG_INFO_PARSER_H_ \ No newline at end of file diff --git a/samples/utils/debug_line_parser.h b/utils/debug_line_parser.h similarity index 96% rename from samples/utils/debug_line_parser.h rename to utils/debug_line_parser.h index acb85f6..5940103 100644 --- a/samples/utils/debug_line_parser.h +++ b/utils/debug_line_parser.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_DEBUG_LINE_PARSER_H_ -#define PTI_SAMPLES_UTILS_DEBUG_LINE_PARSER_H_ +#ifndef PTI_UTILS_DEBUG_LINE_PARSER_H_ +#define PTI_UTILS_DEBUG_LINE_PARSER_H_ #include @@ -137,4 +137,4 @@ class DebugLineParser { uint32_t size_; }; -#endif // PTI_SAMPLES_UTILS_DEBUG_LINE_PARSER_H_ \ No newline at end of file +#endif // PTI_UTILS_DEBUG_LINE_PARSER_H_ \ No newline at end of file diff --git a/samples/utils/dwarf.h b/utils/dwarf.h similarity index 94% rename from samples/utils/dwarf.h rename to utils/dwarf.h index 0813446..bdf7214 100644 --- a/samples/utils/dwarf.h +++ b/utils/dwarf.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_DWARF_H_ -#define PTI_SAMPLES_UTILS_DWARF_H_ +#ifndef PTI_UTILS_DWARF_H_ +#define PTI_UTILS_DWARF_H_ #include #include @@ -71,4 +71,4 @@ struct DwarfAttribute { using DwarfCompUnitMap = std::map >; -#endif // PTI_SAMPLES_UTILS_DWARF_H_ \ No newline at end of file +#endif // PTI_UTILS_DWARF_H_ \ No newline at end of file diff --git a/samples/utils/dwarf_state_machine.h b/utils/dwarf_state_machine.h similarity index 97% rename from samples/utils/dwarf_state_machine.h rename to utils/dwarf_state_machine.h index 9eabc73..3737285 100644 --- a/samples/utils/dwarf_state_machine.h +++ b/utils/dwarf_state_machine.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_DWARF_STATE_MACHINE_H_ -#define PTI_SAMPLES_UTILS_DWARF_STATE_MACHINE_H_ +#ifndef PTI_UTILS_DWARF_STATE_MACHINE_H_ +#define PTI_UTILS_DWARF_STATE_MACHINE_H_ #include "dwarf.h" #include "leb128.h" @@ -209,4 +209,4 @@ class DwarfStateMachine { std::vector line_info_; }; -#endif // PTI_SAMPLES_UTILS_DWARF_STATE_MACHINE_H_ \ No newline at end of file +#endif // PTI_UTILS_DWARF_STATE_MACHINE_H_ \ No newline at end of file diff --git a/samples/utils/elf.h b/utils/elf.h similarity index 88% rename from samples/utils/elf.h rename to utils/elf.h index a11bc75..06c273b 100644 --- a/samples/utils/elf.h +++ b/utils/elf.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_ELF_H_ -#define PTI_SAMPLES_UTILS_ELF_H_ +#ifndef PTI_UTILS_ELF_H_ +#define PTI_UTILS_ELF_H_ #include @@ -42,4 +42,4 @@ struct Elf64SectionHeader { uint64_t entsize; }; -#endif // PTI_SAMPLES_UTILS_ELF_H_ \ No newline at end of file +#endif // PTI_UTILS_ELF_H_ \ No newline at end of file diff --git a/samples/utils/elf_parser.h b/utils/elf_parser.h similarity index 97% rename from samples/utils/elf_parser.h rename to utils/elf_parser.h index b05a610..bb07faa 100644 --- a/samples/utils/elf_parser.h +++ b/utils/elf_parser.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_ELF_PARSER_H_ -#define PTI_SAMPLES_UTILS_ELF_PARSER_H_ +#ifndef PTI_UTILS_ELF_PARSER_H_ +#define PTI_UTILS_ELF_PARSER_H_ #include @@ -175,4 +175,4 @@ class ElfParser { uint32_t size_ = 0; }; -#endif // PTI_SAMPLES_UTILS_ELF_PARSER_H_ \ No newline at end of file +#endif // PTI_UTILS_ELF_PARSER_H_ \ No newline at end of file diff --git a/samples/utils/gen_binary_decoder.h b/utils/gen_binary_decoder.h similarity index 89% rename from samples/utils/gen_binary_decoder.h rename to utils/gen_binary_decoder.h index dc8a42a..6cb3efe 100644 --- a/samples/utils/gen_binary_decoder.h +++ b/utils/gen_binary_decoder.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_GEN_BINARY_DECODER_H_ -#define PTI_SAMPLES_UTILS_GEN_BINARY_DECODER_H_ +#ifndef PTI_UTILS_GEN_BINARY_DECODER_H_ +#define PTI_UTILS_GEN_BINARY_DECODER_H_ #include #include @@ -57,4 +57,4 @@ class GenBinaryDecoder { KernelView kernel_view_; }; -#endif // PTI_SAMPLES_UTILS_GEN_BINARY_DECODER_H_ \ No newline at end of file +#endif // PTI_UTILS_GEN_BINARY_DECODER_H_ \ No newline at end of file diff --git a/samples/utils/gen_symbols_decoder.h b/utils/gen_symbols_decoder.h similarity index 95% rename from samples/utils/gen_symbols_decoder.h rename to utils/gen_symbols_decoder.h index 2bb6d41..e45e9f0 100644 --- a/samples/utils/gen_symbols_decoder.h +++ b/utils/gen_symbols_decoder.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_GEN_SYMBOLS_DECODER_H_ -#define PTI_SAMPLES_UTILS_GEN_SYMBOLS_DECODER_H_ +#ifndef PTI_UTILS_GEN_SYMBOLS_DECODER_H_ +#define PTI_UTILS_GEN_SYMBOLS_DECODER_H_ #include @@ -103,4 +103,4 @@ class GenSymbolsDecoder { size_t size_ = 0; }; -#endif // PTI_SAMPLES_UTILS_GEN_SYMBOLS_DECODER_H_ \ No newline at end of file +#endif // PTI_UTILS_GEN_SYMBOLS_DECODER_H_ \ No newline at end of file diff --git a/samples/utils/gtpin_utils.h b/utils/gtpin_utils.h similarity index 91% rename from samples/utils/gtpin_utils.h rename to utils/gtpin_utils.h index 2f06e64..16d5d04 100644 --- a/samples/utils/gtpin_utils.h +++ b/utils/gtpin_utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_GTPIN_UTILS_H_ -#define PTI_SAMPLES_UTILS_GTPIN_UTILS_H_ +#ifndef PTI_UTILS_GTPIN_UTILS_H_ +#define PTI_UTILS_GTPIN_UTILS_H_ #include #include @@ -56,4 +56,4 @@ inline void KnobAddInt(const char* name, int value) { } // namespace gtpin } // namespace utils -#endif // PTI_SAMPLES_UTILS_GTPIN_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_GTPIN_UTILS_H_ \ No newline at end of file diff --git a/samples/utils/i915_utils.h b/utils/i915_utils.h similarity index 94% rename from samples/utils/i915_utils.h rename to utils/i915_utils.h index 64ce1a2..c21815c 100644 --- a/samples/utils/i915_utils.h +++ b/utils/i915_utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_I915_UTILS_H_ -#define PTI_SAMPLES_UTILS_I915_UTILS_H_ +#ifndef PTI_UTILS_I915_UTILS_H_ +#define PTI_UTILS_I915_UTILS_H_ #if defined(__linux__) #include @@ -98,4 +98,4 @@ inline uint64_t GetGpuTimerFrequency() { } // namespace i915 } // namespace utils -#endif // PTI_SAMPLES_UTILS_I915_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_I915_UTILS_H_ \ No newline at end of file diff --git a/samples/utils/igc_binary_decoder.h b/utils/igc_binary_decoder.h similarity index 100% rename from samples/utils/igc_binary_decoder.h rename to utils/igc_binary_decoder.h diff --git a/samples/utils/leb128.h b/utils/leb128.h similarity index 92% rename from samples/utils/leb128.h rename to utils/leb128.h index 9ca8131..846390c 100644 --- a/samples/utils/leb128.h +++ b/utils/leb128.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_LEB128_H_ -#define PTI_SAMPLES_UTILS_LEB128_H_ +#ifndef PTI_UTILS_LEB128_H_ +#define PTI_UTILS_LEB128_H_ #include @@ -71,4 +71,4 @@ inline const uint8_t* Decode32(const uint8_t* ptr, int32_t& value, } // namespace leb128 } // namespace utils -#endif // PTI_SAMPLES_UTILS_LEB128_H_ \ No newline at end of file +#endif // PTI_UTILS_LEB128_H_ \ No newline at end of file diff --git a/samples/utils/metric_device.h b/utils/metric_device.h similarity index 97% rename from samples/utils/metric_device.h rename to utils/metric_device.h index 5ee1524..2a945b0 100644 --- a/samples/utils/metric_device.h +++ b/utils/metric_device.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_METRIC_DEVICE_H_ -#define PTI_SAMPLES_UTILS_METRIC_DEVICE_H_ +#ifndef PTI_UTILS_METRIC_DEVICE_H_ +#define PTI_UTILS_METRIC_DEVICE_H_ #include @@ -194,4 +194,4 @@ class MetricDevice { SharedLibrary* lib_ = nullptr; }; -#endif // PTI_SAMPLES_UTILS_METRIC_DEVICE_H_ \ No newline at end of file +#endif // PTI_UTILS_METRIC_DEVICE_H_ \ No newline at end of file diff --git a/samples/utils/metric_utils.h b/utils/metric_utils.h similarity index 94% rename from samples/utils/metric_utils.h rename to utils/metric_utils.h index ef66203..6696e9d 100644 --- a/samples/utils/metric_utils.h +++ b/utils/metric_utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_METRIC_UTILS_H_ -#define PTI_SAMPLES_UTILS_METRIC_UTILS_H_ +#ifndef PTI_UTILS_METRIC_UTILS_H_ +#define PTI_UTILS_METRIC_UTILS_H_ #if defined(_WIN32) #include @@ -92,4 +92,4 @@ inline std::vector GetMDLibraryPossiblePaths() { } // namespace metrics } // namespace utils -#endif // PTI_SAMPLES_UTILS_METRIC_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_METRIC_UTILS_H_ \ No newline at end of file diff --git a/samples/utils/pti_assert.h b/utils/pti_assert.h similarity index 73% rename from samples/utils/pti_assert.h rename to utils/pti_assert.h index 76909d2..2f2ca00 100644 --- a/samples/utils/pti_assert.h +++ b/utils/pti_assert.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_PTI_ASSERT_H_ -#define PTI_SAMPLES_UTILS_PTI_ASSERT_H_ +#ifndef PTI_UTILS_PTI_ASSERT_H_ +#define PTI_UTILS_PTI_ASSERT_H_ #ifdef NDEBUG #undef NDEBUG @@ -17,4 +17,4 @@ #define PTI_ASSERT(X) assert(X) -#endif // PTI_SAMPLES_UTILS_PTI_ASSERT_H_ \ No newline at end of file +#endif // PTI_UTILS_PTI_ASSERT_H_ \ No newline at end of file diff --git a/samples/utils/shared_library.h b/utils/shared_library.h similarity index 91% rename from samples/utils/shared_library.h rename to utils/shared_library.h index 83683f1..fbd52bc 100644 --- a/samples/utils/shared_library.h +++ b/utils/shared_library.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_SHARED_LIBRARY_H_ -#define PTI_SAMPLES_UTILS_SHARED_LIBRARY_H_ +#ifndef PTI_UTILS_SHARED_LIBRARY_H_ +#define PTI_UTILS_SHARED_LIBRARY_H_ #if defined(_WIN32) #include @@ -77,4 +77,4 @@ class SharedLibrary { #endif }; -#endif // PTI_SAMPLES_UTILS_SHARED_LIBRARY_H_ \ No newline at end of file +#endif // PTI_UTILS_SHARED_LIBRARY_H_ \ No newline at end of file diff --git a/samples/utils/utils.h b/utils/utils.h similarity index 97% rename from samples/utils/utils.h rename to utils/utils.h index 4a6397c..5d47c5e 100644 --- a/samples/utils/utils.h +++ b/utils/utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_UTILS_H_ -#define PTI_SAMPLES_UTILS_UTILS_H_ +#ifndef PTI_UTILS_UTILS_H_ +#define PTI_UTILS_UTILS_H_ #if defined(_WIN32) #include @@ -165,4 +165,4 @@ inline uint32_t GetTid() { } // namespace utils -#endif // PTI_SAMPLES_UTILS_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_UTILS_H_ \ No newline at end of file diff --git a/samples/utils/ze_utils.h b/utils/ze_utils.h similarity index 98% rename from samples/utils/ze_utils.h rename to utils/ze_utils.h index 6d4f074..3568b4c 100644 --- a/samples/utils/ze_utils.h +++ b/utils/ze_utils.h @@ -4,8 +4,8 @@ // SPDX-License-Identifier: MIT // ============================================================= -#ifndef PTI_SAMPLES_UTILS_ZE_UTILS_H_ -#define PTI_SAMPLES_UTILS_ZE_UTILS_H_ +#ifndef PTI_UTILS_ZE_UTILS_H_ +#define PTI_UTILS_ZE_UTILS_H_ #include @@ -236,4 +236,4 @@ inline std::string GetKernelName(ze_kernel_handle_t kernel) { } // namespace ze } // namespace utils -#endif // PTI_SAMPLES_UTILS_ZE_UTILS_H_ \ No newline at end of file +#endif // PTI_UTILS_ZE_UTILS_H_ \ No newline at end of file