diff --git a/VERSION b/VERSION index 0f7553c..cd3d588 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.43.0 \ No newline at end of file +0.43.3 \ No newline at end of file diff --git a/build_utils/CMakeLists.txt b/build_utils/CMakeLists.txt index 0b886c6..8868722 100644 --- a/build_utils/CMakeLists.txt +++ b/build_utils/CMakeLists.txt @@ -38,7 +38,10 @@ macro(FindOpenCLLibrary TARGET) endif() else() if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1) @@ -191,7 +194,10 @@ macro(FindIGALibrary TARGET) endif() else() if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() find_library(IGA_LIB_PATH NAMES iga64 @@ -321,7 +327,10 @@ macro(CheckForMDLibrary TARGET) endif() else() if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() find_library(MD_LIB_PATH NAMES md @@ -349,7 +358,10 @@ macro(CheckForMetricsLibrary) file(GLOB_RECURSE ML_LIB_PATH "${WIN_SYS_PATH}/igdml64.dll") else() if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() find_library(ML_LIB_PATH NAMES igdml64 @@ -580,7 +592,10 @@ macro(FindL0Library TARGET) endif() else() if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() find_library(L0_LIB_PATH @@ -651,7 +666,10 @@ endmacro() macro(FindDRMLibrary TARGET) if(DEFINED ENV{LD_LIBRARY_PATH}) - string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + string(COMPARE EQUAL "$ENV{LD_LIBRARY_PATH}" "" RESULT) + if (NOT RESULT) + string(REPLACE ":" ";" SEARCH_LIB_PATH $ENV{LD_LIBRARY_PATH}) + endif() endif() find_library(DRM_LIB_PATH diff --git a/chapters/binary_source_correlation/LevelZero.md b/chapters/binary_source_correlation/LevelZero.md index cf57ca3..781235c 100644 --- a/chapters/binary_source_correlation/LevelZero.md +++ b/chapters/binary_source_correlation/LevelZero.md @@ -98,7 +98,7 @@ assert(status == ZE_RESULT_SUCCESS); To decode debug symbols for GPU modules one should refer to [Intel(R) Processor Graphics Compiler (IGC)](https://github.com/intel/intel-graphics-compiler) internal formats described [here](GenSymbolsDecoding.md). ## Usage Details -- refer to oneAPI Level Zero [documentation](https://spec.oneapi.com/level-zero/latest/index.html) to learn more +- refer to oneAPI Level Zero [documentation](https://spec.oneapi.io/level-zero/latest/index.html) to learn more - look into GEN binary decoding [chapter](GenBinaryDecoding.md) to learn more on GEN binary decoding/disassembling interfaces - look into GEN symbols decoding [chapter](GenSymbolsDecoding.md) to learn more on symbols format - refer to the IGC [patch_list.h](https://github.com/intel/intel-graphics-compiler/blob/master/IGC/AdaptorOCL/ocl_igc_shared/executable_format/patch_list.h) header to learn more on module binary layout diff --git a/chapters/code_annotation/ITT.md b/chapters/code_annotation/ITT.md index fe528dc..316a292 100644 --- a/chapters/code_annotation/ITT.md +++ b/chapters/code_annotation/ITT.md @@ -1,7 +1,7 @@ # Instrumentation and Tracing Technology API (ITT API) ## Overview The [Instrumentation and Tracing Technology API (ITT API)](https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/api-support/instrumentation-and-tracing-technology-apis.html) enables the application to generate and control the collection of tracing data during its execution. -It is intended for use with [Intel(R) VTune(TM) Profiler](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html). +It is intended for use with [Intel(R) VTune(TM) Analyzer](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html). Instrumentation and Tracing Technology API (ITT API) provides the following capabilities: - Enable user to control collection; @@ -10,7 +10,7 @@ Instrumentation and Tracing Technology API (ITT API) provides the following capa - Enable user to specify custom synchronization primitives implemented without standard system APIs; - Support applications in C/C++ environments. -User applications/modules linked to the static user API library do not have a runtime dependency on a dynamic library. Therefore, they can be executed without Intel(R) VTune(TM) Profiler with close-to-zero overhead. +User applications/modules linked to the static user API library do not have a runtime dependency on a dynamic library. Therefore, they can be executed without Intel(R) VTune(TM) Analyzer with close-to-zero overhead. **Supported OS**: - Linux @@ -23,7 +23,7 @@ User applications/modules linked to the static user API library do not have a ru - [ittnotify.h](https://github.com/intel/ittapi/blob/master/include/ittnotify.h) **Needed Libraries**: -- Instrumentation and Tracing Technology (ITT) [library](https://github.com/intel/ittapi), can be installed as part of [Intel(R) VTune(TM) Profiler](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) +- Instrumentation and Tracing Technology (ITT) [library](https://github.com/intel/ittapi), can be installed as part of [Intel(R) VTune(TM) Analyzer](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) ## How To Use The following steps should be performed to enable ITT based code annotation for target application: @@ -63,7 +63,7 @@ int main() { } ``` 3. Build the application and link it with ITT library implementation. One may build ITT static library first, and then link the application with it. Another way is to add ITT sources (in particular, [ittnotify_static.c](https://github.com/intel/ittapi/blob/master/src/ittnotify/ittnotify_static.c) file) into the application directly. -4. Run the application under [Intel(R) VTune(TM) Profiler](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) to see the result. +4. Run the application under [Intel(R) VTune(TM) Analyzer](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) to see the result. ## Usage Details - refer to Instrumentation and Tracing Technology API (ITT API) [documentation](https://software.intel.com/content/www/us/en/develop/documentation/vtune-help/top/api-support/instrumentation-and-tracing-technology-apis.html) documentation to learn more on programming interfaces diff --git a/chapters/device_activity_tracing/LevelZero.md b/chapters/device_activity_tracing/LevelZero.md index f64f6e4..4093c96 100644 --- a/chapters/device_activity_tracing/LevelZero.md +++ b/chapters/device_activity_tracing/LevelZero.md @@ -109,7 +109,7 @@ Event pool profiling does not require any additional environment variables to be ``` ## Usage Details -- refer to oneAPI Level Zero [documentation](https://spec.oneapi.com/level-zero/latest/index.html) to learn more +- refer to oneAPI Level Zero [documentation](https://spec.oneapi.io/level-zero/latest/index.html) to learn more ## Samples - [Level Zero GEMM](../../samples/ze_gemm) diff --git a/samples/cl_gemm/main.cc b/samples/cl_gemm/main.cc index d588feb..3bbb945 100644 --- a/samples/cl_gemm/main.cc +++ b/samples/cl_gemm/main.cc @@ -153,16 +153,16 @@ static void Compute(cl_device_id device, const std::vector& a, PTI_ASSERT(status == CL_SUCCESS && kernel != nullptr); for (unsigned i = 0; i < repeat_count; ++i) { - if (i == 0) { // Disable data collection for the first iteration - utils::SetEnv("PTI_DISABLE_COLLECTION", "1"); + if (i == 0) { // Enable data collection for the first iteration + utils::SetEnv("PTI_ENABLE_COLLECTION", "1"); } float eps = RunAndCheck(kernel, queue, a, b, c, size, expected_result); std::cout << "Results are " << ((eps < MAX_EPS) ? "" : "IN") << "CORRECT with accuracy: " << eps << std::endl; - if (i == 0) { // Enable data collection for the rest iterations - utils::SetEnv("PTI_DISABLE_COLLECTION", ""); + if (i == 0) { // Disable data collection for the rest iterations + utils::SetEnv("PTI_ENABLE_COLLECTION", ""); } } diff --git a/samples/cl_gemm_itt/README.md b/samples/cl_gemm_itt/README.md index 5af6ff9..b9ac38b 100644 --- a/samples/cl_gemm_itt/README.md +++ b/samples/cl_gemm_itt/README.md @@ -1,6 +1,6 @@ # OpenCL(TM) GEMM with Code Annotation ## Overview -This sample application performs general matrix multiplication using OpenCL(TM) CPU or GPU device. Its code is annotated with Instrumentation and Tracing Technology API (ITT API) that allows to highligh its regions-of-interest while using Intel(R) VTune(TM) Profiler. +This sample application performs general matrix multiplication using OpenCL(TM) CPU or GPU device. Its code is annotated with Instrumentation and Tracing Technology API (ITT API) that allows to highligh its regions-of-interest while using Intel(R) VTune(TM) Analyzer. ``` OpenCL Matrix Multiplication (matrix size: 1024 x 1024, repeats 4 times) Target device: Intel(R) Gen9 HD Graphics NEO @@ -53,4 +53,4 @@ Use this command line to run the application: cl_gemm_itt.exe [cpu|gpu] [matrix_size] [repeats_count] ``` -Use [Intel(R) VTune(TM) Profiler](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) to profile this application in order to look into its regions-of-interest. \ No newline at end of file +Use [Intel(R) VTune(TM) Analyzer](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html) to profile this application in order to look into its regions-of-interest. \ No newline at end of file diff --git a/samples/ze_gemm/main.cc b/samples/ze_gemm/main.cc index 68a600e..b8740d5 100644 --- a/samples/ze_gemm/main.cc +++ b/samples/ze_gemm/main.cc @@ -219,14 +219,14 @@ static void Compute(ze_device_handle_t device, PTI_ASSERT(status == ZE_RESULT_SUCCESS && module != nullptr); ze_kernel_desc_t kernel_desc = { - ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0, "GEMM"}; + ZE_STRUCTURE_TYPE_KERNEL_DESC, nullptr, 0, "GEMM"}; ze_kernel_handle_t kernel = nullptr; status = zeKernelCreate(module, &kernel_desc, &kernel); PTI_ASSERT(status == ZE_RESULT_SUCCESS && kernel != nullptr); for (unsigned i = 0; i < repeat_count; ++i) { - if (i == 0) { // Disable data collection for the first iteration - utils::SetEnv("PTI_DISABLE_COLLECTION", "1"); + if (i == 0) { // Enable data collection for the first iteration + utils::SetEnv("PTI_ENABLE_COLLECTION", "1"); } float eps = RunAndCheck(kernel, device, context, a, b, c, @@ -234,8 +234,8 @@ static void Compute(ze_device_handle_t device, std::cout << "Results are " << ((eps < MAX_EPS) ? "" : "IN") << "CORRECT with accuracy: " << eps << std::endl; - if (i == 0) { // Enable data collection for the rest iterations - utils::SetEnv("PTI_DISABLE_COLLECTION", ""); + if (i == 0) { // Disable data collection for the rest iterations + utils::SetEnv("PTI_ENABLE_COLLECTION", ""); } } diff --git a/tools/cl_tracer/README.md b/tools/cl_tracer/README.md index e07a25c..a87bee8 100644 --- a/tools/cl_tracer/README.md +++ b/tools/cl_tracer/README.md @@ -122,13 +122,13 @@ Device Timeline (queue: 0x55a9c7e51e70): clEnqueueReadBuffer [ns] = 361479600 (q **Chrome Device Stages** mode provides alternative view for device queue where each kernel invocation is divided into stages: "queued", "sumbitted" and "execution". Can't be used with **Chrome Device Timeline**. -**Conditional Collection** mode allows one to disable data collection for any target interval using environment variable `PTI_DISABLE_COLLECTION`, e.g.: +**Conditional Collection** mode allows one to enable data collection for any target interval (by default collection will be disabled) using environment variable `PTI_ENABLE_COLLECTION`, e.g.: ```cpp -// Collection enabled -setenv("PTI_DISABLE_COLLECTION", "1", 1); // Collection disabled -unsetenv("PTI_DISABLE_COLLECTION"); +setenv("PTI_ENABLE_COLLECTION", "1", 1); // Collection enabled +unsetenv("PTI_ENABLE_COLLECTION"); +// Collection disabled ``` All the API calls and kernels, which submission happens while collection disabled interval, will be omitted from final results. diff --git a/tools/cl_tracer/cl_api_collector.h b/tools/cl_tracer/cl_api_collector.h index 05c1d70..1e790c7 100644 --- a/tools/cl_tracer/cl_api_collector.h +++ b/tools/cl_tracer/cl_api_collector.h @@ -243,7 +243,7 @@ class ClApiCollector { if (callback_data->site == CL_CALLBACK_SITE_ENTER) { PTI_ASSERT(collector->correlator_ != nullptr); - if (collector->correlator_->IsCollectionDisabled()) { + if (!collector->correlator_->IsCollectionEnabled()) { *reinterpret_cast(callback_data->correlationData) = 0; return; } diff --git a/tools/cl_tracer/cl_kernel_collector.h b/tools/cl_tracer/cl_kernel_collector.h index 5e9dd0f..4b95954 100644 --- a/tools/cl_tracer/cl_kernel_collector.h +++ b/tools/cl_tracer/cl_kernel_collector.h @@ -811,7 +811,7 @@ class ClKernelCollector { PTI_ASSERT(collector->correlator_ != nullptr); collector->correlator_->SetKernelId(instance->kernel_id); instance->need_to_process = - !collector->correlator_->IsCollectionDisabled(); + collector->correlator_->IsCollectionEnabled(); ClEnqueueData* enqueue_data = reinterpret_cast(data->correlationData[0]); @@ -851,7 +851,7 @@ class ClKernelCollector { PTI_ASSERT(collector->correlator_ != nullptr); collector->correlator_->SetKernelId(instance->kernel_id); instance->need_to_process = - !collector->correlator_->IsCollectionDisabled(); + collector->correlator_->IsCollectionEnabled(); ClEnqueueData* enqueue_data = reinterpret_cast(data->correlationData[0]); diff --git a/tools/onetrace/README.md b/tools/onetrace/README.md index 422edaf..f558b77 100644 --- a/tools/onetrace/README.md +++ b/tools/onetrace/README.md @@ -138,13 +138,13 @@ Device Timeline (queue: 0x55a9c7e51e70): clEnqueueReadBuffer [ns] = 361479600 (q **Chrome Device Stages** mode provides alternative view for device queue where each kernel invocation is divided into stages: "queued" or "appended", "sumbitted" and "execution". Can't be used with **Chrome Device Timeline**. -**Conditional Collection** mode allows one to disable data collection for any target interval using environment variable `PTI_DISABLE_COLLECTION`, e.g.: +**Conditional Collection** mode allows one to enable data collection for any target interval (by default collection will be disabled) using environment variable `PTI_ENABLE_COLLECTION`, e.g.: ```cpp -// Collection enabled -setenv("PTI_DISABLE_COLLECTION", "1", 1); // Collection disabled -unsetenv("PTI_DISABLE_COLLECTION"); +setenv("PTI_ENABLE_COLLECTION", "1", 1); // Collection enabled +unsetenv("PTI_ENABLE_COLLECTION"); +// Collection disabled ``` All the API calls and kernels, which submission happens while collection disabled interval, will be omitted from final results. diff --git a/tools/sysmon/main.cc b/tools/sysmon/main.cc index cb74976..432f8d9 100644 --- a/tools/sysmon/main.cc +++ b/tools/sysmon/main.cc @@ -1040,19 +1040,25 @@ void PrintTemperatureInfo( if (temp_props.type == ZES_TEMP_SENSORS_GPU) { double temp = 0.0f; status = zesTemperatureGetState(sensor_list[i], &temp); - PTI_ASSERT(status == ZE_RESULT_SUCCESS); - - std::cout << std::setw(TEXT_WIDTH) << std::left << - "Core Temperature(C)," << temp << std::endl; + std::cout << std::setw(TEXT_WIDTH) << + std::left << "Core Temperature(C),"; + if (status == ZE_RESULT_SUCCESS) { + std::cout << temp << std::endl; + } else { + std::cout << "N/A" << std::endl; + } } if (temp_props.type == ZES_TEMP_SENSORS_MEMORY) { double temp = 0.0f; status = zesTemperatureGetState(sensor_list[i], &temp); - PTI_ASSERT(status == ZE_RESULT_SUCCESS); - std::cout << std::setw(TEXT_WIDTH) << std::left << - "Memory Temperature(C)," << temp << std::endl; + "Memory Temperature(C),"; + if (status == ZE_RESULT_SUCCESS) { + std::cout << temp << std::endl; + } else { + std::cout << "N/A" << std::endl; + } } } } diff --git a/tools/utils/correlator.h b/tools/utils/correlator.h index 657e19d..850debc 100644 --- a/tools/utils/correlator.h +++ b/tools/utils/correlator.h @@ -62,11 +62,14 @@ class Correlator { kernel_id_ = kernel_id; } - bool IsCollectionDisabled() const { + bool IsCollectionEnabled() const { if (conditional_collection_) { - return !utils::GetEnv("PTI_DISABLE_COLLECTION").empty(); + std::string enabled = utils::GetEnv("PTI_ENABLE_COLLECTION"); + if (enabled.empty() || enabled == "0") { + return false; + } } - return false; + return true; } #ifdef PTI_LEVEL_ZERO diff --git a/tools/ze_tracer/README.md b/tools/ze_tracer/README.md index c70189a..71ee1b0 100644 --- a/tools/ze_tracer/README.md +++ b/tools/ze_tracer/README.md @@ -113,13 +113,13 @@ Device Timeline (queue: 0x556fa2318fc0): zeCommandListAppendMemoryCopy [ns] = 39 **Chrome Device Stages** mode provides alternative view for device queue where each kernel invocation is divided into stages: "appended", "sumbitted" and "execution". Can't be used with **Chrome Device Timeline**. -**Conditional Collection** mode allows one to disable data collection for any target interval using environment variable `PTI_DISABLE_COLLECTION`, e.g.: +**Conditional Collection** mode allows one to enable data collection for any target interval (by default collection will be disabled) using environment variable `PTI_ENABLE_COLLECTION`, e.g.: ```cpp -// Collection enabled -setenv("PTI_DISABLE_COLLECTION", "1", 1); // Collection disabled -unsetenv("PTI_DISABLE_COLLECTION"); +setenv("PTI_ENABLE_COLLECTION", "1", 1); // Collection enabled +unsetenv("PTI_ENABLE_COLLECTION"); +// Collection disabled ``` All the API calls and kernels, which submission happens while collection disabled interval, will be omitted from final results. diff --git a/tools/ze_tracer/gen_tracing_callbacks.py b/tools/ze_tracer/gen_tracing_callbacks.py index f08f4b4..c1bd0e9 100644 --- a/tools/ze_tracer/gen_tracing_callbacks.py +++ b/tools/ze_tracer/gen_tracing_callbacks.py @@ -321,7 +321,7 @@ def gen_enter_callback(f, func, params, enum_map): f.write(" PTI_ASSERT(collector != nullptr);\n") f.write(" PTI_ASSERT(collector->correlator_ != nullptr);\n") f.write("\n") - f.write(" if (collector->correlator_->IsCollectionDisabled()) {\n") + f.write(" if (!collector->correlator_->IsCollectionEnabled()) {\n") f.write(" *reinterpret_cast(instance_user_data) = 0;\n") f.write(" return;\n") f.write(" }\n") @@ -343,13 +343,21 @@ def gen_enter_callback(f, func, params, enum_map): f.write(" stream << \" " + name + " = \" << (params->p" + name + ")->data;\n") else: if type.find("char*") >= 0 and type.find("char*") == len(type) - len("char*"): - f.write(" if (*(params->p" + name + ") == nullptr) {\n") - f.write(" stream << \" " + name + " = \" << \"0\";\n") - f.write(" } else if (strlen(*(params->p" + name +")) == 0) {\n") - f.write(" stream << \" " + name + " = \\\"\\\"\";\n") - f.write(" } else {\n") - f.write(" stream << \" " + name + " = \\\"\" << *(params->p" + name + ") << \"\\\"\";\n") - f.write(" }\n") + if func == "zeModuleGetFunctionPointer" or func == "zeModuleGetGlobalPointer": + f.write(" if (*(params->p" + name + ") == nullptr) {\n") + f.write(" stream << \" " + name + " = \" << \"0\";\n") + f.write(" } else if (strlen(*(params->p" + name +")) == 0) {\n") + f.write(" stream << \" " + name + " = \\\"\\\"\";\n") + f.write(" } else {\n") + f.write(" stream << \" " + name + " = \\\"\" << *(params->p" + name + ") << \"\\\"\";\n") + f.write(" }\n") + else: + f.write(" if (*(params->p" + name + ") == nullptr) {\n") + f.write(" stream << \" " + name + " = \" << \"0\";\n") + f.write(" } else {\n") + f.write(" stream << \" " + name + " = \" <<\n") + f.write(" reinterpret_cast(*(params->p" + name + "));\n") + f.write(" }\n") else: f.write(" stream << \" " + name + " = \" << *(params->p" + name + ");\n") if name.find("Kernel") >= 0 and func == "zeCommandListAppendLaunchKernel": @@ -608,7 +616,7 @@ def gen_exit_callback(f, func, params, enum_map): else: f.write(" stream << \" " + name[1:] + " = \" << **(params->p" + name + ");\n") f.write(" }\n") - elif name.find("pptr") == 0 or name.find("pCount") == 0: + elif name.find("pptr") == 0 or name == "pCount" or name == "pSize": f.write(" if (*(params->p" + name + ") != nullptr) {\n") if type == "ze_ipc_mem_handle_t*" or type == "ze_ipc_event_pool_handle_t*": f.write(" stream << \" " + name[1:] + " = \" << (*(params->p" + name + "))->data;\n") @@ -619,6 +627,14 @@ def gen_exit_callback(f, func, params, enum_map): f.write(" if (*(params->p" + name + ") != nullptr) {\n") f.write(" stream << \" " + name + " = \" << **(params->p" + name + ");\n") f.write(" }\n") + elif name == "pName": + f.write(" if (*(params->p" + name + ") != nullptr) {\n") + f.write(" if (strlen(*(params->p" + name +")) == 0) {\n") + f.write(" stream << \" " + name[1:] + " = \\\"\\\"\";\n") + f.write(" } else {\n") + f.write(" stream << \" " + name[1:] + " = \\\"\" << *(params->p" + name + ") << \"\\\"\";\n") + f.write(" }\n") + f.write(" }\n") f.write(" }\n") f.write(" stream << \" -> \" << GetResultString(result) << \n") f.write(" \"(0x\" << result << \")\" << std::endl;\n") diff --git a/tools/ze_tracer/ze_kernel_collector.h b/tools/ze_tracer/ze_kernel_collector.h index e8f1ba1..4fb695f 100644 --- a/tools/ze_tracer/ze_kernel_collector.h +++ b/tools/ze_tracer/ze_kernel_collector.h @@ -941,7 +941,7 @@ class ZeKernelCollector { PTI_ASSERT(command->append_time <= call->submit_time); ++(command->call_count); call->call_id = command->call_count; - call->need_to_process = !correlator_->IsCollectionDisabled(); + call->need_to_process = correlator_->IsCollectionEnabled(); kernel_call_list_.push_back(call); correlator_->AddCallId(command_list, call->call_id); @@ -1189,7 +1189,7 @@ class ZeKernelCollector { call->device_submit_time = device_timestamp; call->queue = reinterpret_cast(command_list); PTI_ASSERT(collector->correlator_ != nullptr); - call->need_to_process = !collector->correlator_->IsCollectionDisabled(); + call->need_to_process = collector->correlator_->IsCollectionEnabled(); } *instance_data = static_cast(call);