Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/build_kernel_macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@ on:
jobs:
build:
name: Build kernel
runs-on: macos-latest
runs-on: macos-26
steps:
- name: "Select Xcode"
run: sudo xcrun xcode-select -s /Applications/Xcode_26.0.app
- name: "Install Metal Toolchain"
run: xcodebuild -downloadComponent metalToolchain
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v31
- uses: cachix/cachix-action@v15
Expand Down
56 changes: 35 additions & 21 deletions build2cmake/src/templates/metal/compile-metal.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
# Metal shader compilation function
function(compile_metal_shaders TARGET_NAME METAL_SOURCES EXTRA_INCLUDE_DIRS)
# Find the Metal compiler
find_program(METAL_COMPILER xcrun REQUIRED)

if(NOT DEFINED METAL_TOOLCHAIN)
execute_process(
COMMAND "xcodebuild" "-showComponent" "MetalToolchain"
OUTPUT_VARIABLE FIND_METAL_OUT
RESULT_VARIABLE FIND_METAL_ERROR_CODE
ERROR_VARIABLE FIND_METAL_STDERR
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(NOT FIND_METAL_ERROR_CODE EQUAL 0)
message(FATAL_ERROR "${ERR_MSG}: ${FIND_METAL_STDERR}")
endif()

# Extract the Toolchain Search Path value and append Metal.xctoolchain
string(REGEX MATCH "Toolchain Search Path: ([^\n]+)" MATCH_RESULT "${FIND_METAL_OUT}")
set(METAL_TOOLCHAIN "${CMAKE_MATCH_1}/Metal.xctoolchain")
endif()

# Set Metal compiler flags
set(METAL_FLAGS "-std=metal3.2" "-O2")
set(METAL_FLAGS "-std=metal4.0" "-O2")

# Output directory for compiled metallib
set(METALLIB_OUTPUT_DIR "${CMAKE_BINARY_DIR}/metallib")
file(MAKE_DIRECTORY ${METALLIB_OUTPUT_DIR})
Expand All @@ -18,73 +32,73 @@ function(compile_metal_shaders TARGET_NAME METAL_SOURCES EXTRA_INCLUDE_DIRS)
set(AIR_FILES)
set(METAL_FILES)
set(HEADER_FILES)

foreach(SOURCE_FILE ${METAL_SOURCES})
if(SOURCE_FILE MATCHES "\\.metal$")
list(APPEND METAL_FILES ${SOURCE_FILE})
elseif(SOURCE_FILE MATCHES "\\.h$")
list(APPEND HEADER_FILES ${SOURCE_FILE})
endif()
endforeach()

foreach(METAL_FILE ${METAL_FILES})
get_filename_component(METAL_NAME ${METAL_FILE} NAME_WE)
set(AIR_FILE "${CMAKE_BINARY_DIR}/${METAL_NAME}.air")

# Include header files as dependencies
set(ALL_DEPENDENCIES ${CMAKE_CURRENT_SOURCE_DIR}/${METAL_FILE})
foreach(HEADER_FILE ${HEADER_FILES})
list(APPEND ALL_DEPENDENCIES ${CMAKE_CURRENT_SOURCE_DIR}/${HEADER_FILE})
endforeach()

add_custom_command(
OUTPUT ${AIR_FILE}
COMMAND ${METAL_COMPILER} -sdk macosx metal ${METAL_FLAGS}
COMMAND "${METAL_TOOLCHAIN}/usr/bin/metal" ${METAL_FLAGS}
-c ${CMAKE_CURRENT_SOURCE_DIR}/${METAL_FILE}
-o ${AIR_FILE}
DEPENDS ${ALL_DEPENDENCIES}
COMMENT "Compiling Metal shader ${METAL_FILE} to ${AIR_FILE}"
VERBATIM
)

list(APPEND AIR_FILES ${AIR_FILE})
endforeach()

# Link all .air files into a single .metallib
set(METALLIB_FILE "${METALLIB_OUTPUT_DIR}/${TARGET_NAME}.metallib")
add_custom_command(
OUTPUT ${METALLIB_FILE}
COMMAND ${METAL_COMPILER} -sdk macosx metallib ${AIR_FILES}
COMMAND "${METAL_TOOLCHAIN}/usr/bin/metallib" ${AIR_FILES}
-o ${METALLIB_FILE}
DEPENDS ${AIR_FILES}
COMMENT "Linking Metal library ${METALLIB_FILE}"
VERBATIM
)

# Generate C++ header with embedded metallib data
set(METALLIB_HEADER "${CMAKE_BINARY_DIR}/${TARGET_NAME}_metallib.h")
set(METALLIB_TO_HEADER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/metallib_to_header.py")

add_custom_command(
OUTPUT ${METALLIB_HEADER}
COMMAND ${Python3_EXECUTABLE} ${METALLIB_TO_HEADER_SCRIPT} ${METALLIB_FILE} ${METALLIB_HEADER} ${TARGET_NAME}
DEPENDS ${METALLIB_FILE} ${METALLIB_TO_HEADER_SCRIPT}
COMMENT "Generating embedded Metal library header ${METALLIB_HEADER}"
VERBATIM
)

# Create a custom target for the metallib
add_custom_target(${TARGET_NAME}_metallib ALL DEPENDS ${METALLIB_FILE} ${METALLIB_HEADER})

# Add dependency to main target
add_dependencies(${TARGET_NAME} ${TARGET_NAME}_metallib)

# Add the generated header to include directories
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_BINARY_DIR})

# Pass the metallib header and namespace as compile definitions
target_compile_definitions(${TARGET_NAME} PRIVATE
target_compile_definitions(${TARGET_NAME} PRIVATE
EMBEDDED_METALLIB_HEADER="${TARGET_NAME}_metallib.h"
EMBEDDED_METALLIB_NAMESPACE=${TARGET_NAME}_metal
)
endfunction()
endfunction()
2 changes: 1 addition & 1 deletion build2cmake/src/templates/metal/preamble.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.26)
project({{name}} LANGUAGES CXX C OBJC OBJCXX)

set(CMAKE_OSX_DEPLOYMENT_TARGET "15.0" CACHE STRING "Minimum macOS deployment version")
set(CMAKE_OSX_DEPLOYMENT_TARGET "26.0" CACHE STRING "Minimum macOS deployment version")

install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)

Expand Down
12 changes: 6 additions & 6 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 24 additions & 4 deletions lib/torch-extension/arch.nix
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
build2cmake,
cmake,
cmakeNvccThreadsHook,
cuda_nvcc,
get-kernel-check,
kernel-abi-check,
ninja,
Expand All @@ -24,7 +25,7 @@
xpuPackages,

# Build inputs
apple-sdk_15,
apple-sdk_26,
clr,
oneapi-torch-dev,
onednn-xpu,
Expand Down Expand Up @@ -96,6 +97,24 @@ stdenv.mkDerivation (prevAttrs: {
} --ops-id ${rev} build.toml
'';

preConfigure =
# This is a workaround for https://openradar.appspot.com/FB20389216 - even
# if the user downloaded the Metal toolchain, the mapping is not set up
# for the Nix build users. To make things worse, we cannot set up a mapping
# because the Nix build users do not have a writable home directory and
# showComponent/downloadComponent do not respect the HOME variable. So
# instead, we'll use showComponent (which will emit a lot of warnings due
# to the above) to grab the path of the Metal toolchain.
lib.optionalString metalSupport ''
METAL_PATH=$(${xcrunHost}/bin/xcrunHost xcodebuild -showComponent MetalToolchain 2> /dev/null | sed -rn "s/Toolchain Search Path: (.*)/\1/p")
if [ ! -d "$METAL_PATH" ]; then
>&2 echo "Cannot find Metal toolchain, use: xcodebuild -downloadComponent MetalToolchain"
exit 1
fi
Comment on lines +108 to +113
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the user building the kernel don't have MetalToolchain installed would it be better to use downloadComponent and get the toolchain and the path or just raise an error ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not very keen on modifying the user's development environment outside the sandbox. Also, it will only result in half an install, because the toolchain mapping for the user is not set up (since the build runs as a non-privileged user).


cmakeFlagsArray+=("-DMETAL_TOOLCHAIN=$METAL_PATH/Metal.xctoolchain")
'';

# hipify copies files, but its target is run in the CMake build and install
# phases. Since some of the files come from the Nix store, this fails the
# second time around.
Expand All @@ -115,7 +134,7 @@ stdenv.mkDerivation (prevAttrs: {
]
++ lib.optionals cudaSupport [
cmakeNvccThreadsHook
cudaPackages.cuda_nvcc
cuda_nvcc
]
++ lib.optionals rocmSupport [
clr
Expand Down Expand Up @@ -160,7 +179,7 @@ stdenv.mkDerivation (prevAttrs: {
onednn-xpu
])
++ lib.optionals stdenv.hostPlatform.isDarwin [
apple-sdk_15
apple-sdk_26
]
++ extraDeps;

Expand Down Expand Up @@ -197,7 +216,8 @@ stdenv.mkDerivation (prevAttrs: {
]
++ lib.optionals metalSupport [
# Use host compiler for Metal. Not included in the redistributable SDK.
(lib.cmakeFeature "METAL_COMPILER" "${xcrunHost}/bin/xcrunHost")
# Re-enable when the issue mentioned in preConfigure is solved.
#(lib.cmakeFeature "METAL_COMPILER" "${xcrunHost}/bin/xcrunHost")
];

postInstall = ''
Expand Down
8 changes: 8 additions & 0 deletions lib/torch-extension/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
callPackage,
stdenv,
stdenvGlibc_2_27,
cudaPackages,
rocmPackages,
writeScriptBin,
xpuPackages,
Expand All @@ -28,6 +29,12 @@ let
}
);

cuda_nvcc = cudaPackages.cuda_nvcc.override {
backendStdenv = cudaPackages.backendStdenv.override {
stdenv = effectiveStdenv;
};
};

oneapi-torch-dev = xpuPackages.oneapi-torch-dev.override { stdenv = effectiveStdenv; };
onednn-xpu = xpuPackages.onednn-xpu.override {
inherit oneapi-torch-dev;
Expand All @@ -45,6 +52,7 @@ in
mkExtension = callPackage ./arch.nix {
inherit
clr
cuda_nvcc
oneapi-torch-dev
onednn-xpu
torch
Expand Down
8 changes: 3 additions & 5 deletions pkgs/stdenv-glibc-2_27/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
cudaSupport ? config.cudaSupport,
fetchFromGitHub,
overrideCC,
system,
wrapBintoolsWith,
wrapCCWith,
gcc12Stdenv,
gcc13Stdenv,
stdenv,
bintools-unwrapped,
cudaPackages,
Expand All @@ -19,7 +18,7 @@ let
repo = "nixpkgs";
rev = "a9eb3eed170fa916e0a8364e5227ee661af76fde";
hash = "sha256-1ycrr9HMrGA3ZDM8qmKcZICBupE5UShnIIhPRWdvAzA=";
}) { inherit system; };
}) { inherit (stdenv.hostPlatform) system; };

glibc_2_27 = nixpkgs_20191230.glibc.overrideAttrs (prevAttrs: {
# Slight adjustments for compatibility with modern nixpkgs:
Expand Down Expand Up @@ -64,10 +63,9 @@ let
bintools = bintools-unwrapped;
libc = newGlibc;
};
libcxx = cc.lib;
};
in
overrideCC stdenv compilerWrapped;

in
stdenvWith glibc_2_27 (if cudaSupport then cudaPackages.backendStdenv else gcc12Stdenv).cc.cc stdenv
stdenvWith glibc_2_27 (if cudaSupport then cudaPackages.backendStdenv else gcc13Stdenv).cc.cc stdenv
20 changes: 10 additions & 10 deletions versions.nix
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@
}

# Non-standard versions; not included in bundle builds.
{
torchVersion = "2.8";
cudaVersion = "12.4";
cxx11Abi = true;
systems = [
"x86_64-linux"
"aarch64-linux"
];
sourceBuild = true;
}
#{
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Disabled, because we are now CUDA 12.6+.

# torchVersion = "2.8";
# cudaVersion = "12.4";
# cxx11Abi = true;
# systems = [
# "x86_64-linux"
# "aarch64-linux"
# ];
# sourceBuild = true;
#}
Comment on lines +140 to +149
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this expected ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, see my comment above. We now only have CUDA 12.6 and up. Which means we finally have to make FA3 work with a newer CUDA version.

]
Loading