Skip to content

Commit

Permalink
Restructure PTI repo, improve testing
Browse files Browse the repository at this point in the history
  • Loading branch information
anton-v-gorshkov committed Apr 16, 2021
1 parent df968c9 commit 7bad721
Show file tree
Hide file tree
Showing 102 changed files with 737 additions and 479 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.21.0
0.22.0
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 5 additions & 1 deletion chapters/device_activity_tracing/LevelZero.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,8 @@ Event pool profiling does not require any additional environment variables to be

## Samples
- [Level Zero GEMM](../../samples/ze_gemm)
- [Level Zero Hot Kernels](../../samples/ze_hot_kernels)
- [Level Zero Hot Kernels](../../samples/ze_hot_kernels)

## Tools
- [Level Zero Tracer](../../tools/ze_tracer)
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
6 changes: 5 additions & 1 deletion chapters/runtime_api_tracing/LevelZero.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,8 @@ ZE_ENABLE_TRACING_LAYER=1 ./<application>
- [Level Zero Hot Functions](../../samples/ze_hot_functions)
- [Level Zero Hot Kernels](../../samples/ze_hot_kernels)
- [Level Zero Debug Info](../../samples/ze_debug_info)
- [Level Zero Metric Query](../../samples/ze_metric_query)
- [Level Zero Metric Query](../../samples/ze_metric_query)
## Tools
- [Level Zero Tracer](../../tools/ze_tracer)
- [Tracing and Profiling Tool for Data Parallel C++ (DPC++)](../../tools/onetrace)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 5 additions & 5 deletions samples/cl_debug_info/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -8,9 +8,9 @@ SetBuildType()

# Tool Library

add_library(clt_debug_info SHARED "${PROJECT_SOURCE_DIR}/../loader/init.cc" tool.cc)
add_library(clt_debug_info SHARED "${PROJECT_SOURCE_DIR}/../../loader/init.cc" tool.cc)
target_include_directories(clt_debug_info
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(clt_debug_info
PUBLIC "${CMAKE_INCLUDE_PATH}")
Expand All @@ -30,9 +30,9 @@ GetGmmHeaders(clt_debug_info)
# Loader

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_debug_info")
add_executable(cl_debug_info "${PROJECT_SOURCE_DIR}/../loader/loader.cc")
add_executable(cl_debug_info "${PROJECT_SOURCE_DIR}/../../loader/loader.cc")
target_include_directories(cl_debug_info
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(UNIX)
target_link_libraries(cl_debug_info
dl)
Expand Down
4 changes: 2 additions & 2 deletions samples/cl_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -8,7 +8,7 @@ SetBuildType()

add_executable(cl_gemm main.cc)
target_include_directories(cl_gemm
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(cl_gemm
PUBLIC "${CMAKE_INCLUDE_PATH}")
Expand Down
4 changes: 2 additions & 2 deletions samples/cl_gemm_inst/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -8,7 +8,7 @@ SetBuildType()

add_executable(cl_gemm_inst main.cc)
target_include_directories(cl_gemm_inst
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(cl_gemm_inst
PUBLIC "${CMAKE_INCLUDE_PATH}")
Expand Down
4 changes: 2 additions & 2 deletions samples/cl_gemm_itt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -8,7 +8,7 @@ SetBuildType()

add_executable(cl_gemm_itt main.cc ittnotify.cc)
target_include_directories(cl_gemm_itt
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(cl_gemm_itt
PUBLIC "${CMAKE_INCLUDE_PATH}")
Expand Down
10 changes: 5 additions & 5 deletions samples/cl_gpu_metrics/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -9,10 +9,10 @@ SetBuildType()
# Tool Library

add_library(clt_gpu_metrics SHARED
"${PROJECT_SOURCE_DIR}/../loader/init.cc"
"${PROJECT_SOURCE_DIR}/../../loader/init.cc"
tool.cc)
target_include_directories(clt_gpu_metrics
PRIVATE "${PROJECT_SOURCE_DIR}/../utils"
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils"
PRIVATE "${PROJECT_SOURCE_DIR}/../cl_hot_kernels")
if(CMAKE_INCLUDE_PATH)
target_include_directories(clt_gpu_metrics
Expand All @@ -34,9 +34,9 @@ CheckForMDLibrary()
# Loader

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_gpu_metrics")
add_executable(cl_gpu_metrics "${PROJECT_SOURCE_DIR}/../loader/loader.cc")
add_executable(cl_gpu_metrics "${PROJECT_SOURCE_DIR}/../../loader/loader.cc")
target_include_directories(cl_gpu_metrics
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(UNIX)
target_link_libraries(cl_gpu_metrics
dl)
Expand Down
10 changes: 5 additions & 5 deletions samples/cl_hot_functions/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("../build_utils/CMakeLists.txt")
include("../../build_utils/CMakeLists.txt")
SetRequiredCMakeVersion()
cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION})

Expand All @@ -9,10 +9,10 @@ SetBuildType()
# Tool Library

add_library(clt_hot_functions SHARED
"${PROJECT_SOURCE_DIR}/../loader/init.cc"
"${PROJECT_SOURCE_DIR}/../../loader/init.cc"
tool.cc)
target_include_directories(clt_hot_functions
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(CMAKE_INCLUDE_PATH)
target_include_directories(clt_hot_functions
PUBLIC "${CMAKE_INCLUDE_PATH}")
Expand All @@ -26,9 +26,9 @@ GetOpenCLTracingHeaders(clt_hot_functions)
# Loader

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTOOL_NAME=clt_hot_functions")
add_executable(cl_hot_functions "${PROJECT_SOURCE_DIR}/../loader/loader.cc")
add_executable(cl_hot_functions "${PROJECT_SOURCE_DIR}/../../loader/loader.cc")
target_include_directories(cl_hot_functions
PRIVATE "${PROJECT_SOURCE_DIR}/../utils")
PRIVATE "${PROJECT_SOURCE_DIR}/../../utils")
if(UNIX)
target_link_libraries(cl_hot_functions
dl)
Expand Down
66 changes: 41 additions & 25 deletions samples/cl_hot_functions/README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
# OpenCL(TM) Hot Functions
## Overview
This is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count for GPU device.
This is a simple LD_PRELOAD based tool that allows to collect all called OpenCL(TM) API functions within an application along with their total execution time and call count.

As a result, table like the following will be printed.
```
Function, Calls, Time (ns), Average (ns)
clBuildProgram, 1, 183549198, 183549198
clCreateBuffer, 12, 108285, 9023
clCreateCommandQueueWithProperties, 1, 1265, 1265
clCreateContext, 1, 9322, 9322
clCreateKernel, 1, 3428, 3428
clCreateProgramWithSource, 1, 3219, 3219
clEnqueueNDRangeKernel, 4, 2237845, 559461
clEnqueueReadBuffer, 4, 2358133, 589533
clEnqueueWriteBuffer, 8, 5719781, 714972
clFinish, 4, 174064236, 43516059
clGetDeviceIDs, 2, 362, 181
clGetDeviceInfo, 2, 354, 177
clGetEventProfilingInfo, 8, 14198, 1774
clGetKernelInfo, 4, 2411, 602
clReleaseCommandQueue, 1, 1046, 1046
clReleaseContext, 1, 173, 173
clReleaseKernel, 1, 2741, 2741
clReleaseMemObject, 12, 110922, 9243
clReleaseProgram, 1, 11561, 11561
clSetKernelArg, 16, 75282, 4705
=== API Timing Results: ===
Total Execution Time (ns): 363687486
Total API Time for CPU backend (ns): 524
Total API Time for GPU backend (ns): 355355363
== CPU Backend: ==
Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns)
clGetDeviceIDs, 1, 524, 100.00, 524, 524, 524
== GPU Backend: ==
Function, Calls, Time (ns), Time (%), Average (ns), Min (ns), Max (ns)
clBuildProgram, 1, 173888026, 48.93, 173888026, 173888026, 173888026
clFinish, 4, 172908147, 48.66, 43227036, 42711785, 44318785
clEnqueueWriteBuffer, 8, 4636256, 1.30, 579532, 207825, 1864890
clEnqueueReadBuffer, 4, 2051244, 0.58, 512811, 498662, 542971
clEnqueueNDRangeKernel, 4, 1623139, 0.46, 405784, 236120, 609050
clReleaseMemObject, 12, 95182, 0.03, 7931, 3525, 16436
clCreateBuffer, 12, 81056, 0.02, 6754, 2511, 16990
clSetKernelArg, 16, 24515, 0.01, 1532, 141, 7038
clGetEventProfilingInfo, 8, 13139, 0.00, 1642, 103, 3288
clCreateContext, 1, 12680, 0.00, 12680, 12680, 12680
clReleaseProgram, 1, 9503, 0.00, 9503, 9503, 9503
clCreateProgramWithSource, 1, 3880, 0.00, 3880, 3880, 3880
clCreateKernel, 1, 2941, 0.00, 2941, 2941, 2941
clReleaseKernel, 1, 1679, 0.00, 1679, 1679, 1679
clGetKernelInfo, 4, 1617, 0.00, 404, 190, 552
clCreateCommandQueueWithProperties, 1, 1388, 0.00, 1388, 1388, 1388
clGetDeviceIDs, 2, 311, 0.00, 155, 138, 173
clReleaseCommandQueue, 1, 270, 0.00, 270, 270, 270
clGetDeviceInfo, 2, 227, 0.00, 113, 103, 124
clReleaseContext, 1, 163, 0.00, 163, 163, 163
```
## Supported OS
- Linux
Expand All @@ -35,7 +48,8 @@ clCreateCommandQueueWithProperties, 1, 1265,
- [Git](https://git-scm.com/) (version 1.8 and above)
- [Python](https://www.python.org/) (version 2.7 and above)
- [OpenCL(TM) ICD Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader)
- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime)
- [Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver](https://github.com/intel/compute-runtime) to run on GPU
- [Intel(R) Xeon(R) Processor / Intel(R) Core(TM) Processor (CPU) Runtimes](https://software.intel.com/en-us/articles/opencl-drivers#cpu-section) to run on CPU

## Build and Run
### Linux
Expand All @@ -51,9 +65,10 @@ Use this command line to run the tool:
```sh
./cl_hot_functions <target_application>
```
One may use [cl_gemm](../cl_gemm) as target application:
One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application:
```sh
./cl_hot_functions ../../cl_gemm/build/cl_gemm
./cl_hot_functions ../../dpc_gemm/build/dpc_gemm cpu
```
### Windows
Use Microsoft* Visual Studio x64 command prompt to run the following commands and build the sample:
Expand All @@ -68,7 +83,8 @@ Use this command line to run the tool:
```sh
cl_hot_functions.exe <target_application>
```
One may use [cl_gemm](../cl_gemm) as target application:
One may use [cl_gemm](../cl_gemm) or [dpc_gemm](../dpc_gemm) as target application:
```sh
cl_hot_functions.exe ..\..\cl_gemm\build\cl_gemm.exe
cl_hot_functions.exe ..\..\dpc_gemm\build\dpc_gemm.exe cpu
```
Loading

0 comments on commit 7bad721

Please sign in to comment.