diff --git a/Compiling.md b/Compiling.md index 50232864c..467db4f8f 100644 --- a/Compiling.md +++ b/Compiling.md @@ -17,14 +17,17 @@ As also mentioned in the instructions below but repeated here for visibility, if * If using the CUDA backend, CUDA 10.2 with CUDNN 7.6.5, or CUDA 11.1 with CUDNN 8.0.4 (https://developer.nvidia.com/cuda-toolkit) (https://developer.nvidia.com/cudnn) and a GPU capable of supporting them. I'm unsure how version compatibility works with CUDA, there's a good chance that later versions than these work just as well, but they have not been tested. * If using the Eigen backend, Eigen3. With Debian packages, (i.e. apt or apt-get), this should be `libeigen3-dev`. * zlib, libzip. With Debian packages (i.e. apt or apt-get), these should be `zlib1g-dev`, `libzip-dev`. - * If you want to do self-play training and research, probably Google perftools `libgoogle-perftools-dev` for TCMalloc or some other better malloc implementation. For unknown reasons, the allocation pattern in self-play with large numbers of threads and parallel games causes a lot of memory fragmentation under glibc malloc that will eventually run your machine out of memory, but better mallocs handle it fine. + * If you want to do self-play training and research, a better malloc implementation. For unknown reasons, the allocation pattern in self-play with large numbers of threads and parallel games causes a lot of memory fragmentation under glibc malloc that will eventually run your machine out of memory, but better mallocs handle it fine. + * You can install the Google perftools package `libgoogle-perftools-dev` for TCMalloc. + * You can manually build [mimalloc](https://github.com/microsoft/mimalloc) for an alternative memory allocator as well. * If compiling to contribute to public distributed training runs, OpenSSL is required (`libssl-dev`). * Clone this repo: * `git clone https://github.com/lightvector/KataGo.git` * Compile using CMake and make in the cpp directory: * `cd KataGo/cpp` * `cmake . -DUSE_BACKEND=OPENCL` or `cmake . -DUSE_BACKEND=CUDA` or `cmake . -DUSE_BACKEND=EIGEN` depending on which backend you want. - * Specify also `-DUSE_TCMALLOC=1` if using TCMalloc. + * Specify also `-DUSE_TCMALLOC=1` if using TCMalloc, or `-DUSE_MIMALLOC=1` if using mimalloc. + * Note that you should set `LD_LIBRARY_PATH` with something like `export LD_LIBRARY_PATH=/usr/local/lib/mimalloc-1.7/:$LD_LIBRARY_PATH` (preferably in ~/.bashrc or any corresponding shell config) befor running KataGo if you are using mimalloc, for the time being. * Compiling will also call git commands to embed the git hash into the compiled executable, specify also `-DNO_GIT_REVISION=1` to disable it if this is causing issues for you. * Specify `-DUSE_AVX2=1` to also compile Eigen with AVX2 and FMA support, which will make it incompatible with old CPUs but much faster. (If you want to go further, you can also add `-DCMAKE_CXX_FLAGS='-march=native'` which will specialize to precisely your machine's CPU, but the exe might not run on other machines at all). * Specify `-DBUILD_DISTRIBUTED=1` to compile with support for contributing data to public distributed training runs. @@ -46,6 +49,7 @@ As also mentioned in the instructions below but repeated here for visibility, if * zlib. The following package might work, https://www.nuget.org/packages/zlib-vc140-static-64/, or alternatively you can build it yourself via something like: https://github.com/kiyolee/zlib-win-build * libzip (optional, needed only for self-play training) - for example https://github.com/kiyolee/libzip-win-build * If compiling to contribute to public distributed training runs, OpenSSL is required (https://www.openssl.org/, https://wiki.openssl.org/index.php/Compilation_and_Installation). + * Optionally, you can manually build [mimalloc](https://github.com/microsoft/mimalloc) for an alternative memory allocator, following the instruction in the GitHub repository. * Download/clone this repo to some folder `KataGo`. * Configure using CMake GUI and compile in MSVC: * Select `KataGo/cpp` as the source code directory in [CMake GUI](https://cmake.org/runningcmake/). @@ -62,6 +66,7 @@ As also mentioned in the instructions below but repeated here for visibility, if * `BUILD_DISTRIBUTED` to compile with support for contributing data to public distributed training runs. * If building distributed, you will also need to build with Git revision support, including building within a clone of the repo, as opposed to merely an unzipped copy of its source. * Only builds from specific tagged versions or branches can contribute, in particular, instead of the `master` branch, use either the latest release tag (e.g. v1.8.0) or the tip of the `stable` branch. To minimize the chance of any data incompatibilities or bugs, please do NOT attempt to contribute with custom changes or circumvent these limitations. + * `USE_MIMALLOC` if using mimalloc. You may want to specify `MIMALLOC_PATH` as well to specify where the library files are. * Once running "Configure" looks good, run "Generate" and then open MSVC and build as normal in MSVC. * Done! You should now have a compiled `katago.exe` executable in your working directory. * Note: You may need to copy the ".dll" files corresponding to the various ".lib" files you compiled with into the directory containing katago.exe. diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 51a859bb4..718d02ded 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -36,6 +36,7 @@ string(TOUPPER "${USE_BACKEND}" USE_BACKEND) set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA OPENCL EIGEN) set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc") +set(USE_MIMALLOC 0 CACHE BOOL "Use mimalloc") set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe") set(USE_AVX2 0 CACHE BOOL "Compile with AVX2") set(USE_BIGGER_BOARDS_EXPENSIVE 0 CACHE BOOL "Allow boards up to size 29. Compiling with this will use more memory and slow down KataGo, even when playing on boards of size 19.") @@ -188,6 +189,22 @@ endif() # set (Gperftools_DIR "${CMAKE_CURRENT_LIST_DIR}/cmake/") # find_package(Gperftools REQUIRED) +#--------------------------- MIMALLOC ---------------------------------------------------------------------------------- + +if(USE_MIMALLOC) + message(STATUS "-DUSE_MIMALLOC=1 is set, using mimalloc as the allocator") + set(MIMALLOC_PATH CACHE STRING "mimalloc library location") + if(MSVC) + find_library(MIMALLOC_LIB mimalloc-override HINTS ${MIMALLOC_PATH}) + else() + find_library(MIMALLOC_LIB mimalloc HINTS /usr ${MIMALLOC_PATH}) + endif() + # find_package(MIMALLOC_LIB REQUIRED HINTS /usr ${MIMALLOC_PATH}) + if(NOT MIMALLOC_LIB) + message(FATAL_ERROR "Could not find mimalloc") + endif() +endif() + #--------------------------- GIT --------------------------------------------------------------------------------------- if(NO_GIT_REVISION AND (NOT BUILD_DISTRIBUTED)) @@ -391,6 +408,10 @@ if(USE_TCMALLOC) target_link_libraries(katago ${TCMALLOC_LIB}) endif(USE_TCMALLOC) +if(USE_MIMALLOC) + target_link_libraries(katago ${MIMALLOC_LIB}) +endif(USE_MIMALLOC) + if(BUILD_DISTRIBUTED) message(STATUS "-DBUILD_DISTRIBUTED=1 is set, compiling code and dependencies to contribute to distributed training") target_compile_definitions(katago PRIVATE BUILD_DISTRIBUTED)