flashinfer-ai
diff --git a/‎CMakeLists.txt
+94-409 b/‎CMakeLists.txt
+94-409
diff --git a/‎cmake/Components.cmake
+24 b/‎cmake/Components.cmake
+24
diff --git a/‎cmake/Dependencies.cmake
+146 b/‎cmake/Dependencies.cmake
+146
diff --git a/‎cmake/Options.cmake
+86 b/‎cmake/Options.cmake
+86
diff --git a/‎cmake/flashinferConfig.cmake.in
+72 b/‎cmake/flashinferConfig.cmake.in
+72
@@ -0,0 +1,24 @@
+# Define the component structure
+set(FLASHINFER_COMPONENTS "Headers")
+
+if(FLASHINFER_BUILD_KERNELS)
+  list(APPEND FLASHINFER_COMPONENTS "Kernels")
+
+  if(FLASHINFER_TVM_BINDING)
+    list(APPEND FLASHINFER_COMPONENTS "TVMBinding")
+  endif()
+endif()
+
+if(FLASHINFER_DISTRIBUTED)
+  list(APPEND FLASHINFER_COMPONENTS "Distributed")
+endif()
+
+# Setup component-specific build flags
+macro(add_component_flags component)
+  add_definitions(-DFLASHINFER_COMPONENT_${component})
+endmacro()
+
+# For each enabled component, add compile-time flags
+foreach(comp ${FLASHINFER_COMPONENTS})
+  add_component_flags(${comp})
+endforeach()
@@ -0,0 +1,146 @@
+# === Required Dependencies for Core Functionality ===
+find_package(CUDAToolkit REQUIRED)
+find_package(Python3 REQUIRED)
+if(NOT Python3_FOUND)
+  message(
+    FATAL_ERROR
+      "Python3 not found it is required to generate the kernel sources.")
+endif()
+
+find_package(Thrust REQUIRED)
+
+# === Test Dependencies ===
+if(FLASHINFER_UNITTESTS)
+  include(FetchContent)
+
+  # Google Test for unit testing
+  FetchContent_Declare(
+    googletest
+    GIT_REPOSITORY https://github.com/google/googletest.git
+    GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5 # release-1.16.0
+    FIND_PACKAGE_ARGS NAMES GTest)
+  FetchContent_MakeAvailable(googletest)
+endif()
+
+# === Benchmark Dependencies ===
+if(FLASHINFER_CXX_BENCHMARKS)
+  include(FetchContent)
+
+  # NVBench for GPU benchmarking
+  FetchContent_Declare(
+    nvbench
+    GIT_REPOSITORY https://github.com/NVIDIA/nvbench.git
+    GIT_TAG c03033b50e46748207b27685b1cdfcbe4a2fec59)
+  FetchContent_MakeAvailable(nvbench)
+endif()
+
+# === Boost Dependency for FP16 QK Reductions ===
+if(FLASHINFER_GEN_USE_FP16_QK_REDUCTIONS)
+  include(FetchContent)
+  set(BOOST_ENABLE_CMAKE ON)
+  FetchContent_Declare(boost_math
+                       GIT_REPOSITORY https://github.com/boostorg/math.git)
+  FetchContent_MakeAvailable(boost_math)
+
+  set(USE_FP16_QK_REDUCTIONS "true")
+  message(STATUS "USE_FP16_QK_REDUCTIONS=${USE_FP16_QK_REDUCTIONS}")
+else()
+  set(USE_FP16_QK_REDUCTIONS "false")
+  message(STATUS "USE_FP16_QK_REDUCTIONS=${USE_FP16_QK_REDUCTIONS}")
+endif()
+
+# === Distributed component dependencies ===
+if(FLASHINFER_DISTRIBUTED OR FLASHINFER_DIST_UNITTESTS)
+  include(FetchContent)
+  FetchContent_Declare(
+    mscclpp
+    GIT_REPOSITORY https://github.com/microsoft/mscclpp.git
+    GIT_TAG 11e62024d3eb190e005b4689f8c8443d91a6c82e)
+  FetchContent_MakeAvailable(mscclpp)
+
+  # Create alias for distributed component
+  if(NOT TARGET flashinfer::mscclpp)
+    add_library(flashinfer::mscclpp ALIAS mscclpp)
+  endif()
+
+  # Fetch spdlog for distributed tests (header-only usage)
+  FetchContent_Declare(
+    spdlog
+    GIT_REPOSITORY https://github.com/gabime/spdlog.git
+    GIT_TAG f355b3d58f7067eee1706ff3c801c2361011f3d5 # release-1.15.1
+    FIND_PACKAGE_ARGS NAMES spdlog)
+
+  # Use Populate instead of MakeAvailable since we only need the headers
+  FetchContent_Populate(spdlog)
+
+  # Set the include directory for later use
+  set(SPDLOG_INCLUDE_DIR "${spdlog_SOURCE_DIR}/include")
+  message(STATUS "Using spdlog from ${SPDLOG_INCLUDE_DIR}")
+
+  find_package(MPI REQUIRED)
+endif()
+
+# === FP8 Dependencies ===
+if(FLASHINFER_FP8_TESTS OR FLASHINFER_FP8_BENCHMARKS)
+  # Verify CUDA architecture is SM90 or higher
+  if(NOT CMAKE_CUDA_ARCHITECTURES STREQUAL "90"
+     AND NOT CMAKE_CUDA_ARCHITECTURES STREQUAL "90a")
+    message(
+      FATAL_ERROR "FP8 tests/benchmarks require SM90 or higher architecture")
+  endif()
+
+  # Find PyTorch which is required for FP8 features
+  find_package(Torch REQUIRED)
+  if(NOT Torch_FOUND)
+    message(
+      FATAL_ERROR "PyTorch is required for FP8 tests/benchmarks but not found")
+  endif()
+  message(STATUS "Found PyTorch: ${TORCH_INCLUDE_DIRS}")
+
+  # Fetch Flash Attention repository with specific commit
+  include(FetchContent)
+  FetchContent_Declare(
+    flash_attention
+    GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
+    GIT_TAG 29ef580560761838c0e9e82bc0e98d04ba75f949)
+  FetchContent_Populate(flash_attention)
+
+  # Set Flash Attention 3 include directory
+  set(FA3_INCLUDE_DIR "${flash_attention_SOURCE_DIR}/csrc/flash_attn/hopper")
+  message(STATUS "Flash Attention 3 source directory: ${FA3_INCLUDE_DIR}")
+
+  # Compile Flash Attention 3 kernel library
+  file(GLOB FA3_IMPL_FILES "${FA3_INCLUDE_DIR}/flash_fwd_*.cu")
+endif()
+
+# === TVM Binding dependencies ===
+if(FLASHINFER_TVM_BINDING)
+  # Resolve TVM source directory
+  if(NOT FLASHINFER_TVM_SOURCE_DIR STREQUAL "")
+    set(TVM_SOURCE_DIR_SET ${FLASHINFER_TVM_SOURCE_DIR})
+  elseif(DEFINED ENV{TVM_SOURCE_DIR})
+    set(TVM_SOURCE_DIR_SET $ENV{TVM_SOURCE_DIR})
+  elseif(DEFINED ENV{TVM_HOME})
+    set(TVM_SOURCE_DIR_SET $ENV{TVM_HOME})
+  else()
+    message(
+      FATAL_ERROR
+        "TVM source directory not found. Set FLASHINFER_TVM_SOURCE_DIR.")
+  endif()
+endif()
+
+# === CUTLASS Configuration ===
+if(FLASHINFER_CUTLASS_DIR)
+  list(APPEND CMAKE_PREFIX_PATH ${FLASHINFER_CUTLASS_DIR})
+endif()
+
+if(FLASHINFER_CUTLASS_DIR)
+  # Add CUTLASS include directories directly
+  include_directories(${FLASHINFER_CUTLASS_DIR}/include)
+  include_directories(${FLASHINFER_CUTLASS_DIR}/tools/util/include)
+
+  message(STATUS "Using CUTLASS from ${FLASHINFER_CUTLASS_DIR}")
+else()
+  message(
+    FATAL_ERROR "FLASHINFER_CUTLASS_DIR must be set to the path of CUTLASS")
+endif()
@@ -0,0 +1,86 @@
+# cmake-format: off
+# NOTE:
+# a) Do not modify this file to change option values. Options should be
+#    configured using either a config.cmake file (refer the default file
+#    inside the cmake folder), or by setting the required -DFLASHINFER_XXX
+#    option through command-line.
+#
+# b) This file should only contain option definitions and should not contain
+#    any other CMake commands.
+#
+# c) All new options should be defined here with a default value and a short
+#    description.
+#
+# d) Add new options under the appropriate section.
+
+# === COMPONENT OPTIONS ===
+flashinfer_option(FLASHINFER_BUILD_KERNELS "Build and install kernel libraries" OFF)
+flashinfer_option(FLASHINFER_TVM_BINDING "Build TVM binding support" OFF)
+flashinfer_option(FLASHINFER_DISTRIBUTED "Build distributed support" OFF)
+
+# === DATA TYPE OPTIONS ===
+flashinfer_option(FLASHINFER_ENABLE_FP8 "Enable FP8 data type support" ON)
+flashinfer_option(FLASHINFER_ENABLE_FP8_E4M3 "Enable FP8 E4M3 format specifically" ON)
+flashinfer_option(FLASHINFER_ENABLE_FP8_E5M2 "Enable FP8 E5M2 format specifically" ON)
+flashinfer_option(FLASHINFER_ENABLE_F16 "Enable F16 data type support" ON)
+flashinfer_option(FLASHINFER_ENABLE_BF16 "Enable BF16 data type support" ON)
+
+# === CODE GENERATION OPTIONS ===
+flashinfer_option(FLASHINFER_GEN_HEAD_DIMS "Head dimensions to enable" 64 128 256)
+flashinfer_option(FLASHINFER_GEN_POS_ENCODING_MODES "Position encoding modes to enable" 0 1 2)
+flashinfer_option(FLASHINFER_GEN_MASK_MODES "Mask modes to enable" 0 1 2)
+# FIXME: CAUTION!!! Turning on this option will cause build failures. (refer #806, #936)
+flashinfer_option(FLASHINFER_GEN_USE_FP16_QK_REDUCTIONS "Use FP16 for QK reductions" OFF)
+
+# === BUILD TYPE OPTIONS ===
+flashinfer_option(FLASHINFER_UNITTESTS "Build unit tests" OFF)
+flashinfer_option(FLASHINFER_CXX_BENCHMARKS "Build benchmarks" OFF)
+flashinfer_option(FLASHINFER_DIST_UNITTESTS "Build distributed unit tests" OFF)
+
+# === FEATURE-SPECIFIC TESTS/BENCHMARKS ===
+flashinfer_option(FLASHINFER_FP8_TESTS "Build FP8 tests" OFF)
+flashinfer_option(FLASHINFER_FP8_BENCHMARKS "Build FP8 benchmarks" OFF)
+
+# === ARCHITECTURE OPTIONS ===
+flashinfer_option(FLASHINFER_CUDA_ARCHITECTURES "CUDA architectures to compile for" "")
+
+# === PATH OPTIONS ===
+flashinfer_option(FLASHINFER_CUTLASS_DIR "Path to CUTLASS installation" "")
+flashinfer_option(FLASHINFER_TVM_SOURCE_DIR "Path to TVM source directory" "")
+
+# === AUTO-DERIVED OPTIONS ===
+# Handle CUDA architectures
+if(FLASHINFER_CUDA_ARCHITECTURES)
+  message(STATUS "CMAKE_CUDA_ARCHITECTURES set to ${FLASHINFER_CUDA_ARCHITECTURES}.")
+  set(CMAKE_CUDA_ARCHITECTURES ${FLASHINFER_CUDA_ARCHITECTURES})
+endif()
+
+# Handle automatic enabling of dependent features
+if(FLASHINFER_FP8_TESTS)
+  set(FLASHINFER_UNITTESTS ON CACHE BOOL "Tests enabled for FP8" FORCE)
+endif()
+
+if(FLASHINFER_FP8_BENCHMARKS)
+  set(FLASHINFER_CXX_BENCHMARKS ON CACHE BOOL "Benchmarks enabled for FP8" FORCE)
+endif()
+
+if(FLASHINFER_DIST_UNITTESTS)
+  set(FLASHINFER_UNITTESTS ON CACHE BOOL "Tests enabled for distributed" FORCE)
+endif()
+
+if(FLASHINFER_TVM_BINDING AND NOT FLASHINFER_BUILD_KERNELS)
+  message(FATAL_ERROR "TVM binding requires FLASHINFER_BUILD_KERNELS to be ON")
+endif()
+
+if(FLASHINFER_ENABLE_FP8)
+  # Enable both FP8 formats when FP8 is enabled
+  set(FLASHINFER_ENABLE_FP8_E4M3 ON CACHE BOOL "Enable FP8 E4M3 format" FORCE)
+  set(FLASHINFER_ENABLE_FP8_E5M2 ON CACHE BOOL "Enable FP8 E5M2 format" FORCE)
+endif()
+
+# Ensure FP8 is enabled for FP8 tests/benchmarks
+if(FLASHINFER_FP8_TESTS OR FLASHINFER_FP8_BENCHMARKS)
+  set(FLASHINFER_ENABLE_FP8 ON CACHE BOOL "FP8 enabled for tests/benchmarks" FORCE)
+  set(FLASHINFER_ENABLE_FP8_E4M3 ON CACHE BOOL "FP8_E4M3 enabled for tests/benchmarks" FORCE)
+endif()
+# cmake-format: on
@@ -0,0 +1,72 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+# Required dependencies for FlashInfer headers
+find_dependency(CUDAToolkit REQUIRED)
+
+# Optional dependencies based on components
+if("Distributed" IN_LIST flashinfer_FIND_COMPONENTS)
+  find_dependency(MPI REQUIRED)
+  find_package(mscclpp REQUIRED)
+endif()
+
+# Define available components passed from main CMakeLists.txt
+set(_flashinfer_available_components @FLASHINFER_COMPONENTS@)
+
+# Initialize component found status
+foreach(_comp Headers Kernels TVMBinding Distributed)
+  set(flashinfer_${_comp}_FOUND FALSE)
+endforeach()
+
+# Mark available components as found
+foreach(_comp ${_flashinfer_available_components})
+  set(flashinfer_${_comp}_FOUND TRUE)
+endforeach()
+
+# Check for requested components
+foreach(_comp ${flashinfer_FIND_COMPONENTS})
+  if(NOT _comp IN_LIST _flashinfer_available_components)
+    set(flashinfer_FOUND False)
+    set(flashinfer_NOT_FOUND_MESSAGE "Requested component: ${_comp} is not available")
+    return()
+  endif()
+endforeach()
+
+# Headers component is always available and included
+set(flashinfer_Headers_FOUND TRUE)
+
+# Include kernel targets if available and requested
+if(flashinfer_Kernels_FOUND AND
+   ("Kernels" IN_LIST flashinfer_FIND_COMPONENTS OR NOT flashinfer_FIND_COMPONENTS))
+  include("${CMAKE_CURRENT_LIST_DIR}/libflashinferTargets.cmake" OPTIONAL)
+endif()
+
+# Include TVM binding targets if available and requested
+if(flashinfer_TVMBinding_FOUND AND
+   "TVMBinding" IN_LIST flashinfer_FIND_COMPONENTS)
+  include("${CMAKE_CURRENT_LIST_DIR}/flashinferTVMBindingTargets.cmake" OPTIONAL)
+endif()
+
+# Include Distributed targets if available and requested
+if(flashinfer_Distributed_FOUND AND
+   "Distributed" IN_LIST flashinfer_FIND_COMPONENTS)
+  # Create aliases to make usage consistent
+  if(NOT TARGET flashinfer::dist)
+    add_library(flashinfer::dist INTERFACE IMPORTED)
+    set_target_properties(flashinfer::dist PROPERTIES
+      INTERFACE_LINK_LIBRARIES mscclpp)
+  endif()
+
+  include("${CMAKE_CURRENT_LIST_DIR}/flashinferDistTargets.cmake")
+endif()
+
+# Set include directories
+set(FLASHINFER_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/include")
+
+# Print status message
+if(NOT flashinfer_FIND_QUIETLY)
+  message(STATUS "Found flashinfer: ${PACKAGE_PREFIX_DIR} (version: ${flashinfer_VERSION})")
+  message(STATUS "Available components: ${_flashinfer_available_components}")
+endif()
+
+check_required_components(flashinfer)