lightvector
diff --git a/‎Compiling.md‎
Lines changed: 2 additions & 1 deletion b/‎Compiling.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 51 additions & 2 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 51 additions & 2 deletions
diff --git a/‎cpp/command/benchmark.cpp‎
Lines changed: 3 additions & 0 deletions b/‎cpp/command/benchmark.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cpp/configs/analysis_example.cfg‎
Lines changed: 12 additions & 0 deletions b/‎cpp/configs/analysis_example.cfg‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cpp/configs/contribute_example.cfg‎
Lines changed: 12 additions & 0 deletions b/‎cpp/configs/contribute_example.cfg‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cpp/configs/gtp_example.cfg‎
Lines changed: 12 additions & 0 deletions b/‎cpp/configs/gtp_example.cfg‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cpp/configs/match_example.cfg‎
Lines changed: 12 additions & 0 deletions b/‎cpp/configs/match_example.cfg‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cpp/main.cpp‎
Lines changed: 4 additions & 0 deletions b/‎cpp/main.cpp‎
Lines changed: 4 additions & 0 deletions
@@ -133,14 +133,15 @@ As also mentioned in the instructions below but repeated here for visibility, if
       * AppleClang and Swift compilers: `xcode-select --install`.
       * If using the Metal backend, [Ninja](https://ninja-build.org): `brew install ninja`
       * If using the Metal backend, protobuf and abseil: `brew install protobuf abseil`
+      * If using the MLX backend (Apple Silicon only): `brew install mlx` (≥0.18). Requires CMake ≥3.27. KataGo finds MLX via CMake's default search (Homebrew installs it at `/opt/homebrew/share/cmake/MLX/`); override with `-DMLX_ROOT=/path/to/mlx/cmake` if needed.
       * libzip: `brew install libzip`.
       * If you want to do self-play training and research, probably Google perftools `brew install gperftools` for TCMalloc or some other better malloc implementation. For unknown reasons, the allocation pattern in self-play with large numbers of threads and parallel games causes a lot of memory fragmentation under glibc malloc that will eventually run your machine out of memory, but better mallocs handle it fine.
       * If compiling to contribute to public distributed training runs, OpenSSL is required (`brew install openssl`).
    * Clone this repo:
       * `git clone https://github.com/lightvector/KataGo.git`
    * Compile using CMake and make in the cpp directory:
       * `cd KataGo/cpp`
-      * `cmake . -G Ninja -DUSE_BACKEND=METAL` or `cmake . -DUSE_BACKEND=OPENCL` or `cmake . -DUSE_BACKEND=EIGEN` depending on which backend you want.
+      * `cmake . -G Ninja -DUSE_BACKEND=METAL` or `cmake . -DUSE_BACKEND=MLX` or `cmake . -DUSE_BACKEND=OPENCL` or `cmake . -DUSE_BACKEND=EIGEN` depending on which backend you want.
          * Specify also `-DUSE_TCMALLOC=1` if using TCMalloc.
          * Compiling will also call git commands to embed the git hash into the compiled executable, specify also `-DNO_GIT_REVISION=1` to disable it if this is causing issues for you.
          * Specify `-DUSE_AVX2=1` to also compile Eigen with AVX2 and FMA support, which will make it incompatible with old CPUs but much faster. Intel-based Macs with new processors support AVX2, but Apple Silicon Macs do not support AVX2 natively. (If you want to go further, you can also add `-DCMAKE_CXX_FLAGS='-march=native'` which will specialize to precisely your machine's CPU, but the exe might not run on other machines at all).
 
@@ -1,4 +1,23 @@
 cmake_minimum_required(VERSION 3.18.2)
+
+# Pre-project MLX setup. KataGo's MLX path enforces CMake 3.27 via the guard
+# below (MLX itself requires only 3.25 - 3.27 is chosen to match
+# cmake_policy(VERSION 3.27)); the global cmake_minimum_required stays at
+# 3.18.2 so non-MLX backends keep building on older CMake.
+#
+# The OSX deployment target is deliberately NOT pinned here. KataGo links
+# Homebrew's prebuilt libmlx.dylib, whose minos reflects the macOS it was
+# bottled on - that dylib, not this build, sets the real minimum macOS.
+# Pinning a lower value only stamps a misleading minos on the executable and
+# triggers a "linking with dylib built for newer version" linker warning;
+# letting CMake default the target to the build host keeps minos honest.
+if(USE_BACKEND STREQUAL "MLX")
+  if(CMAKE_VERSION VERSION_LESS 3.27)
+    message(FATAL_ERROR "KataGo's USE_BACKEND=MLX path requires CMake 3.27 or newer. You have ${CMAKE_VERSION}. Install via: brew install cmake")
+  endif()
+  cmake_policy(VERSION 3.27)
+endif()
+
 if(USE_BACKEND STREQUAL "METAL")
   project(katago LANGUAGES CXX Swift)
 else()
@@ -44,7 +63,7 @@ endif()
 set(BUILD_DISTRIBUTED 0 CACHE BOOL "Build with http support for contributing to distributed training")
 set(USE_BACKEND CACHE STRING "Neural net backend")
 string(TOUPPER "${USE_BACKEND}" USE_BACKEND)
-set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN METAL)
+set_property(CACHE USE_BACKEND PROPERTY STRINGS "" CUDA TENSORRT OPENCL EIGEN MLX METAL)
 
 set(USE_TCMALLOC 0 CACHE BOOL "Use TCMalloc")
 set(NO_GIT_REVISION 0 CACHE BOOL "Disable embedding the git revision into the compiled exe")
@@ -158,8 +177,35 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
   set(NEURALNET_BACKEND_SOURCES
     neuralnet/eigenbackend.cpp
     )
+elseif(USE_BACKEND STREQUAL "MLX")
+  message(STATUS "-DUSE_BACKEND=MLX, using MLX backend for Apple Silicon.")
+
+  if(NOT APPLE)
+    message(FATAL_ERROR "USE_BACKEND=MLX is only supported on macOS. Detected: ${CMAKE_SYSTEM_NAME}")
+  endif()
+  if(CMAKE_OSX_ARCHITECTURES)
+    if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
+      message(FATAL_ERROR "USE_BACKEND=MLX requires arm64. Got: ${CMAKE_OSX_ARCHITECTURES}")
+    endif()
+  elseif(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
+    message(FATAL_ERROR "USE_BACKEND=MLX requires Apple Silicon (arm64). Detected: ${CMAKE_SYSTEM_PROCESSOR}")
+  endif()
+
+  set(MLX_MIN_VERSION "0.18")
+  set(MLX_ROOT "" CACHE PATH "Optional path to MLX's CMake package; leave empty to use CMake's default search (e.g. Homebrew's /opt/homebrew/share/cmake/MLX/)")
+
+  # Homebrew installs MLX's CMake config to /opt/homebrew/share/cmake/MLX/, which is
+  # on CMake's default search path. MLX_ROOT, when set, is added as an extra hint.
+  find_package(MLX ${MLX_MIN_VERSION} CONFIG REQUIRED HINTS "${MLX_ROOT}")
+  message(STATUS "Found MLX ${MLX_VERSION} at ${MLX_LIBRARY}")
+
+  set(NEURALNET_BACKEND_SOURCES
+    neuralnet/mlxbackend.cpp
+    neuralnet/mlxwinotuner.cpp
+    neuralnet/mlxtests.cpp
+    )
 elseif(USE_BACKEND STREQUAL "")
-  message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=TENSORRT or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=EIGEN to compile with the respective backend.${ColorReset}")
+  message(WARNING "${ColorBoldRed}WARNING: Using dummy neural net backend, intended for non-neural-net testing only, will fail on any code path requiring a neural net. To use neural net, specify -DUSE_BACKEND=CUDA or -DUSE_BACKEND=TENSORRT or -DUSE_BACKEND=OPENCL or -DUSE_BACKEND=EIGEN or -DUSE_BACKEND=MLX or -DUSE_BACKEND=METAL to compile with the respective backend.${ColorReset}")
   set(NEURALNET_BACKEND_SOURCES neuralnet/dummybackend.cpp)
 else()
   message(FATAL_ERROR "Unrecognized backend: " ${USE_BACKEND})
@@ -496,6 +542,9 @@ elseif(USE_BACKEND STREQUAL "EIGEN")
       message(STATUS "Found Eigen3 at ${EIGEN3_INCLUDE_DIRS}")
     endif()
   endif()
+elseif(USE_BACKEND STREQUAL "MLX")
+  target_compile_definitions(katago PRIVATE USE_MLX_BACKEND)
+  target_link_libraries(katago mlx)
 endif()
 
 if(USE_BIGGER_BOARDS_EXPENSIVE)
 
@@ -267,6 +267,9 @@ int MainCmds::benchmark(const vector<string>& args) {
 #endif
 #ifdef USE_EIGEN_BACKEND
   cout << "You are currently using the Eigen (CPU) version of KataGo. Due to having no GPU, it may be slow." << endl;
+#endif
+#ifdef USE_MLX_BACKEND
+  cout << "Your GTP config is currently set to mlxUseFP16 = " << nnEval->getUsingFP16Mode().toString() << endl;
 #endif
   cout << endl;
   cout << "Your GTP config is currently set to use numSearchThreads = " << params.numThreads << endl;
 
@@ -298,6 +298,18 @@ nnRandomize = true
 # It defaults to min(numAnalysisThreads * numSearchThreadsPerAnalysisThread, numCPUCores).
 # numEigenThreadsPerModel = X
 
+# ------------------------------
+# MLX-specific settings
+# ------------------------------
+# These only apply when using the MLX backend (Apple Silicon).
+
+# Whether to use FP16 (half precision) for neural net evaluation on MLX.
+# FP16 is faster than FP32 on Apple Silicon via the MLX Winograd path.
+# Set `false` for bit-exact FP32 reproducibility.
+#
+# Default: auto (resolves to fp16 on MLX).
+# mlxUseFP16 = auto
+
 
 # Misc Behavior --------------------
 
 
@@ -139,3 +139,15 @@ watchOngoingGameInFileName = watchgame.txt
 # This is the number of CPU threads for evaluating the neural net on the Eigen backend.
 # It defaults to numSearchThreads.
 # numEigenThreadsPerModel = X
+
+# ------------------------------
+# MLX-specific settings
+# ------------------------------
+# These only apply when using the MLX backend (Apple Silicon).
+
+# Whether to use FP16 (half precision) for neural net evaluation on MLX.
+# FP16 is faster than FP32 on Apple Silicon via the MLX Winograd path.
+# Set `false` for bit-exact FP32 reproducibility.
+#
+# Default: auto (resolves to fp16 on MLX).
+# mlxUseFP16 = auto
@@ -539,6 +539,18 @@ searchFactorWhenWinningThreshold = 0.95
 # Default: numSearchThreads
 # numEigenThreadsPerModel = X
 
+# ------------------------------
+# MLX-specific settings
+# ------------------------------
+# These only apply when using the MLX backend (Apple Silicon).
+
+# Whether to use FP16 (half precision) for neural net evaluation on MLX.
+# FP16 is faster than FP32 on Apple Silicon via the MLX Winograd path.
+# Set `false` for bit-exact FP32 reproducibility.
+#
+# Default: auto (resolves to fp16 on MLX).
+# mlxUseFP16 = auto
+
 # ===========================================================================
 # Root move selection and biases
 # ===========================================================================
 
@@ -197,6 +197,18 @@ numNNServerThreadsPerModel = 1
 # It defaults to numSearchThreads.
 # numEigenThreadsPerModel = X
 
+# ------------------------------
+# MLX-specific settings
+# ------------------------------
+# These only apply when using the MLX backend (Apple Silicon).
+
+# Whether to use FP16 (half precision) for neural net evaluation on MLX.
+# FP16 is faster than FP32 on Apple Silicon via the MLX Winograd path.
+# Set `false` for bit-exact FP32 reproducibility.
+#
+# Default: auto (resolves to fp16 on MLX).
+# mlxUseFP16 = auto
+
 
 # Root move selection and biases------------------------------------------------------------------------------
 # Uncomment and edit any of the below values to change them from their default.
 
@@ -246,6 +246,8 @@ string Version::getKataGoVersionFullInfo() {
   out << "Using OpenCL backend" << endl;
 #elif defined(USE_EIGEN_BACKEND)
   out << "Using Eigen(CPU) backend" << endl;
+#elif defined(USE_MLX_BACKEND)
+  out << "Using MLX backend" << endl;
 #else
   out << "Using dummy backend" << endl;
 #endif
@@ -282,6 +284,8 @@ string Version::getGitRevisionWithBackend() {
   s += "-opencl";
 #elif defined(USE_EIGEN_BACKEND)
   s += "-eigen";
+#elif defined(USE_MLX_BACKEND)
+  s += "-mlx";
 #else
   s += "-dummy";
 #endif