Skip to content

ONNXRuntime backend (WIP) #341

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ out.txt
# For clion IDE
.idea

# For vscode
.vscode

# For cmake
CMakeCache.txt
CMakeFiles/
Expand Down
122 changes: 103 additions & 19 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,9 @@ if(NOT WIN32)
set(ColorBoldRed "${ColorRed}${ColorBold}")
endif()

#--------------------------- CMAKE VARIABLES (partly for Cmake GUI) ----------------------------------------------------

set(BUILD_DISTRIBUTED 0 CACHE BOOL "Build with http support for contributing to distributed training")
set(USE_BACKEND CACHE STRING "Neural net backend")
string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA OPENCL EIGEN)

set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
set(USE_AVX2 0 CACHE BOOL "Compile with AVX2")
set(USE_BIGGER_BOARDS_EXPENSIVE 0 CACHE BOOL "Allow boards up to size 29. Compiling with this Will use more memory and slow down KataGo, even when playing on boards of size 19.")

#--------------------------- NEURAL NET BACKEND ------------------------------------------------------------------------

message(STATUS "Building 'katago' executable for GTP engine and other tools.")
if(USE_BACKEND STREQUAL "CUDA")
message(STATUS "-DUSE_BACKEND=CUDA, using CUDA backend.")

#--------------------------- CUDA MACRO -------------------------------------------------------------------------------

macro(CONFIGURE_CUDA)
# Ensure dynamic cuda linking (Versions prior to 3.17)
if (${CMAKE_VERSION} VERSION_LESS "3.17")
set(CMAKE_CUDA_FLAGS "" CACHE STRING "")
Expand Down Expand Up @@ -146,6 +130,26 @@ if(USE_BACKEND STREQUAL "CUDA")
"
)
endif()
endmacro()

#--------------------------- CMAKE VARIABLES (partly for Cmake GUI) ----------------------------------------------------

set(BUILD_DISTRIBUTED 0 CACHE BOOL "Build with http support for contributing to distributed training")
set(USE_BACKEND CACHE STRING "Neural net backend")
string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA OPENCL EIGEN ONNXRUNTIME)

set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
set(USE_AVX2 0 CACHE BOOL "Compile with AVX2")
set(USE_BIGGER_BOARDS_EXPENSIVE 0 CACHE BOOL "Allow boards up to size 29. Compiling with this Will use more memory and slow down KataGo, even when playing on boards of size 19.")

#--------------------------- NEURAL NET BACKEND ------------------------------------------------------------------------

message(STATUS "Building 'katago' executable for GTP engine and other tools.")
if(USE_BACKEND STREQUAL "CUDA")
message(STATUS "-DUSE_BACKEND=CUDA, using CUDA backend.")
configure_cuda()
elseif(USE_BACKEND STREQUAL "OPENCL")
message(STATUS "-DUSE_BACKEND=OPENCL, using OpenCL backend.")
set(NEURALNET_BACKEND_SOURCES
Expand All @@ -162,8 +166,28 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
set(NEURALNET_BACKEND_SOURCES
neuralnet/eigenbackend.cpp
)
elseif(USE_BACKEND STREQUAL "ONNXRUNTIME")
message(STATUS "-DUSE_BACKEND=ONNXRUNTIME, using ONNXRuntime backend.")
set(ORT_CUDA 0 CACHE BOOL "Use CUDA execution provider for ONNXRuntime.")
set(ORT_TENSORRT 0 CACHE BOOL "Use TensorRT execution provider for ONNXRuntime.")
set(ORT_DIRECTML 0 CACHE BOOL "Use DirectML execution provider for ONNXRuntime.")
set(ORT_MIGRAPHX 0 CACHE BOOL "Use MIGraphX execution provider for ONNXRuntime.")
if(ORT_CUDA OR ORT_TENSORRT)
configure_cuda()
endif()
if(ORT_MIGRAPHX)
set(NEURALNET_BACKEND_SOURCES
neuralnet/ortbackend.cpp
neuralnet/openclhelpers.cpp
)
else()
set(NEURALNET_BACKEND_SOURCES
neuralnet/ortbackend.cpp
)
endif()

elseif(USE_BACKEND STREQUAL "")
message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=EIGEN to compile with the respective backend.${ColorReset}")
message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=ONNXRUNTIME or -DUSE_BACKEND=EIGEN to compile with the respective backend.${ColorReset}")
set(NEURALNET_BACKEND_SOURCES neuralnet/dummybackend.cpp)
else()
message(FATAL_ERROR "Unrecognized backend: " ${USE_BACKEND})
Expand Down Expand Up @@ -331,6 +355,66 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
endif()
endif()
endif()
elseif(USE_BACKEND STREQUAL "ONNXRUNTIME")
target_compile_definitions(katago PRIVATE USE_ONNXRUNTIME_BACKEND)
set(ORT_LIB_DIR CACHE STRING "ONNXRuntime library location")
set(ORT_INCLUDE_DIR CACHE STRING "ONNXRuntime header files location")
message(STATUS "ORT_LIB_DIR: " ${ORT_LIB_DIR})
message(STATUS "ORT_INCLUDE_DIR: " ${ORT_INCLUDE_DIR})
include_directories(${ORT_INCLUDE_DIR})
if(EXISTS ${ORT_INCLUDE_DIR}/core/session)
include_directories(${ORT_INCLUDE_DIR}/core/session)
endif()
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/cpu)
include_directories(${ORT_INCLUDE_DIR}/core/providers/cpu)
endif()
find_library(ORT_LIBRARY NAMES onnxruntime PATHS ${ORT_LIB_DIR})
if(NOT ORT_LIBRARY)
message(FATAL_ERROR "Could not find onnxruntime")
endif()
target_link_libraries(katago ${ORT_LIBRARY})
if(ORT_CUDA)
target_compile_definitions(katago PRIVATE USE_ORT_CUDA)
endif()
if(ORT_TENSORRT)
target_compile_definitions(katago PRIVATE USE_ORT_TENSORRT)
set(TENSORRT_LIB_DIR CACHE STRING "TensorRT library location")
set(TENSORRT_INCLUDE_DIR CACHE STRING "TensorRT header file location")
include_directories(${TENSORRT_INCLUDE_DIR})
find_library(TENSORRT_LIBRARY NAMES nvinfer PATHS ${TENSORRT_LIB_DIR})
if(NOT TENSORRT_LIBRARY)
message(FATAL_ERROR "Could not find nvinfer")
endif()
target_link_libraries(katago ${TENSORRT_LIBRARY})
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/tensorrt)
include_directories(${ORT_INCLUDE_DIR}/core/providers/tensorrt)
endif()
endif()
if(ORT_CUDA OR ORT_TENSORRT)
find_package(CUDA REQUIRED)
find_path(CUDNN_INCLUDE_DIR cudnn.h HINTS ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES cuda/include include)
if((NOT CUDNN_INCLUDE_DIR))
message(ERROR "${ColorBoldRed} cudnn.h was NOT found, specify CUDNN_INCLUDE_DIR to indicate where it is. ${ColorReset}")
endif()
find_library(CUDNN_LIBRARY libcudnn.so PATHS /usr/local/cuda/lib64 /opt/cuda/lib64)
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS} ${CUDNN_INCLUDE_DIR}) #SYSTEM is for suppressing some compiler warnings in thrust libraries
target_link_libraries(katago ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_LIBRARIES})
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/cuda)
include_directories(${ORT_INCLUDE_DIR}/core/providers/cuda)
endif()
endif()
if(ORT_DIRECTML)
target_compile_definitions(katago PRIVATE USE_ORT_DIRECTML)
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/directml)
include_directories(${ORT_INCLUDE_DIR}/core/providers/directml)
endif()
endif()
if(ORT_MIGRAPHX)
target_compile_definitions(katago PRIVATE USE_ORT_MIGRAPHX)
if(EXISTS ${ORT_INCLUDE_DIR}/core/providers/migraphx)
include_directories(${ORT_INCLUDE_DIR}/core/providers/migraphx)
endif()
endif()
endif()

if(USE_BIGGER_BOARDS_EXPENSIVE)
Expand Down
45 changes: 45 additions & 0 deletions cpp/command/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ int MainCmds::benchmark(int argc, const char* const* argv) {
cout << "If you have a strong GPU capable of FP16 tensor cores (e.g. RTX2080), "
<< "using the Cuda version of KataGo instead may give a mild performance boost." << endl;
#endif
#ifdef USE_ONNXRUNTIME_BACKEND
cout << "You are currently using the ONNXRuntime version of KataGo with "
<< nnEval->getOnnxRuntimeExecutionProvider() << " execution provider." << endl;
#endif
#ifdef USE_EIGEN_BACKEND
cout << "You are currently using the Eigen (CPU) version of KataGo. Due to having no GPU, it may be slow." << endl;
#endif
Expand Down Expand Up @@ -564,6 +568,7 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
int64_t configMaxPlayouts = ((int64_t)1) << 50;
double configMaxTime = 1e20;
double configMaxPonderTime = -1.0;
string configOnnxRuntimeExecutionProvider;
vector<int> configDeviceIdxs;
int configNNCacheSizePowerOfTwo = 20;
int configNNMutexPoolSizePowerOfTwo = 16;
Expand Down Expand Up @@ -693,6 +698,41 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
});
}

#ifdef USE_ONNXRUNTIME_BACKEND
cout << endl;
cout << "=========================================================================" << endl;
cout << "ONNXRUNTIME EXECUTION PROVIDER" << endl;

{
vector<string> executionProviders;
#ifdef USE_ORT_CUDA
executionProviders.push_back("CUDA");
#endif
#ifdef USE_ORT_TENSORRT
executionProviders.push_back("TensorRT");
#endif
#ifdef USE_ORT_DIRECTML
executionProviders.push_back("DirectML");
#endif
#ifdef USE_ORT_MIGRAPHX
executionProviders.push_back("MIGraphX");
#endif

cout << endl;
cout << "Available ONNXRuntime execution providers:" << endl;
for(const auto provider: executionProviders) {
cout << provider << " ";
}
cout << endl << endl;

string prompt = "Specify an execution provider for ONNXRuntime. Leave blank to use the first available provider.\n";
promptAndParseInput(prompt, [&](const string& line) {
if(line == "") configOnnxRuntimeExecutionProvider = executionProviders[0];
else configOnnxRuntimeExecutionProvider = line;
});
}
#endif

cout << endl;
cout << "=========================================================================" << endl;
cout << "GPUS AND RAM" << endl;
Expand All @@ -701,7 +741,11 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
{
cout << endl;
cout << "Finding available GPU-like devices..." << endl;
#ifndef USE_ONNXRUNTIME_BACKEND
NeuralNet::printDevices();
#else
NeuralNet::printDevices(configOnnxRuntimeExecutionProvider);
#endif
cout << endl;

string prompt =
Expand Down Expand Up @@ -789,6 +833,7 @@ int MainCmds::genconfig(int argc, const char* const* argv, const char* firstComm
configMaxPlayouts,
configMaxTime,
configMaxPonderTime,
configOnnxRuntimeExecutionProvider,
configDeviceIdxs,
configNNCacheSizePowerOfTwo,
configNNMutexPoolSizePowerOfTwo,
Expand Down
4 changes: 4 additions & 0 deletions cpp/neuralnet/cudabackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2584,6 +2584,8 @@ ComputeContext* NeuralNet::createComputeContext(
int nnXLen,
int nnYLen,
const string& openCLTunerFile,
const string& onnxOptModelFile,
const string& onnxRuntimeExecutionProvider,
const string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand All @@ -2593,6 +2595,8 @@ ComputeContext* NeuralNet::createComputeContext(
(void)gpuIdxs;
(void)logger;
(void)openCLTunerFile;
(void)onnxOptModelFile;
(void)onnxRuntimeExecutionProvider;
(void)homeDataDirOverride;
(void)openCLReTunePerBoardSize;
(void)loadedModel;
Expand Down
4 changes: 4 additions & 0 deletions cpp/neuralnet/dummybackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ ComputeContext* NeuralNet::createComputeContext(
int nnXLen,
int nnYLen,
const string& openCLTunerFile,
const string& onnxOptModelFile,
const string& onnxRuntimeExecutionProvider,
const string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand All @@ -30,6 +32,8 @@ ComputeContext* NeuralNet::createComputeContext(
(void)nnXLen;
(void)nnYLen;
(void)openCLTunerFile;
(void)onnxOptModelFile;
(void)onnxRuntimeExecutionProvider;
(void)homeDataDirOverride;
(void)openCLReTunePerBoardSize;
(void)useFP16Mode;
Expand Down
4 changes: 4 additions & 0 deletions cpp/neuralnet/eigenbackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,8 @@ ComputeContext* NeuralNet::createComputeContext(
int nnXLen,
int nnYLen,
const string& openCLTunerFile,
const string& onnxOptModelFile,
const string& onnxRuntimeExecutionProvider,
const string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand All @@ -1438,6 +1440,8 @@ ComputeContext* NeuralNet::createComputeContext(
(void)gpuIdxs;
(void)logger;
(void)openCLTunerFile;
(void)onnxOptModelFile;
(void)onnxRuntimeExecutionProvider;
(void)homeDataDirOverride;
(void)openCLReTunePerBoardSize;

Expand Down
10 changes: 8 additions & 2 deletions cpp/neuralnet/nneval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ NNEvaluator::NNEvaluator(
int nnMutexPoolSizePowerofTwo,
bool skipNeuralNet,
const string& openCLTunerFile,
const string& onnxOptModelFile,
const string& onnxRuntimeExecutionProvider,
const string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand All @@ -83,6 +85,7 @@ NNEvaluator::NNEvaluator(
requireExactNNLen(rExactNNLen),
policySize(NNPos::getPolicySize(xLen,yLen)),
inputsUseNHWC(iUseNHWC),
ortExecutionProvider(onnxRuntimeExecutionProvider),
usingFP16Mode(useFP16Mode),
usingNHWCMode(useNHWCMode),
numThreads(numThr),
Expand Down Expand Up @@ -145,8 +148,8 @@ NNEvaluator::NNEvaluator(
inputsVersion = NNModelVersion::getInputsVersion(modelVersion);
computeContext = NeuralNet::createComputeContext(
gpuIdxs,logger,nnXLen,nnYLen,
openCLTunerFile,homeDataDirOverride,openCLReTunePerBoardSize,
usingFP16Mode,usingNHWCMode,loadedModel
openCLTunerFile,onnxOptModelFile,onnxRuntimeExecutionProvider,
homeDataDirOverride,openCLReTunePerBoardSize,usingFP16Mode,usingNHWCMode,loadedModel
);
}
else {
Expand Down Expand Up @@ -224,6 +227,9 @@ int NNEvaluator::getNNXLen() const {
int NNEvaluator::getNNYLen() const {
return nnYLen;
}
string NNEvaluator::getOnnxRuntimeExecutionProvider() const{
return ortExecutionProvider;
}
enabled_t NNEvaluator::getUsingFP16Mode() const {
return usingFP16Mode;
}
Expand Down
4 changes: 4 additions & 0 deletions cpp/neuralnet/nneval.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ class NNEvaluator {
int nnMutexPoolSizePowerofTwo,
bool debugSkipNeuralNet,
const std::string& openCLTunerFile,
const std::string& onnxOptModelFile,
const std::string& onnxRuntimeExecutionProvider,
const std::string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand All @@ -113,6 +115,7 @@ class NNEvaluator {
int getNumServerThreads() const;
int getNNXLen() const;
int getNNYLen() const;
std::string getOnnxRuntimeExecutionProvider() const;
enabled_t getUsingFP16Mode() const;
enabled_t getUsingNHWCMode() const;

Expand Down Expand Up @@ -172,6 +175,7 @@ class NNEvaluator {
const bool requireExactNNLen;
const int policySize;
const bool inputsUseNHWC;
const std::string ortExecutionProvider;
const enabled_t usingFP16Mode;
const enabled_t usingNHWCMode;
int numThreads;
Expand Down
12 changes: 12 additions & 0 deletions cpp/neuralnet/nninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ namespace NeuralNet {

//Print available backend devices
void printDevices();
void printDevices(const std::string& ortExecutionProvider);
#if defined(USE_ORT_CUDA) || defined(USE_ORT_TENSORRT)
void printCUDADevices();
#endif
#ifdef USE_ORT_DIRECTML
void printDirectMLDevices();
#endif
#ifdef USE_ORT_MIGRAPHX
void printOpenCLDevices();
#endif

// Model I/O -----------------------------------------------------------------

Expand All @@ -59,6 +69,8 @@ namespace NeuralNet {
int nnXLen,
int nnYLen,
const std::string& openCLTunerFile,
const std::string& onnxOptModelFile,
const std::string& onnxRuntimeExecutionProvider,
const std::string& homeDataDirOverride,
bool openCLReTunePerBoardSize,
enabled_t useFP16Mode,
Expand Down
Loading