diff --git a/host-configs/tuo-local-hsmp.cmake b/host-configs/tuo-local-hsmp.cmake new file mode 100644 index 000000000..eb8657b1d --- /dev/null +++ b/host-configs/tuo-local-hsmp.cmake @@ -0,0 +1,34 @@ +# Copyright 2019-2023 Lawrence Livermore National Security, LLC and other +# Variorum Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +# c compiler +set(CMAKE_C_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gcc" CACHE PATH "") + +# cpp compiler +set(CMAKE_CXX_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/g++" CACHE PATH "") + +# fortran compiler +set(CMAKE_Fortran_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gfortran" CACHE PATH "") + +set(ENABLE_MPI OFF CACHE BOOL "") +set(ENABLE_OPENMP OFF CACHE BOOL "") + +set(BUILD_TESTS OFF CACHE BOOL "") + +set(VARIORUM_DEBUG OFF CACHE BOOL "") + +set(VARIORUM_WITH_AMD_CPU ON CACHE BOOL "") +set(VARIORUM_WITH_AMD_GPU OFF CACHE BOOL "") +set(VARIORUM_WITH_ARM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_IBM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_GPU OFF CACHE BOOL "") +set(VARIORUM_WITH_NVIDIA_GPU OFF CACHE BOOL "") + +# path to e_smi_library install +set(ESMI_DIR "/usr/workspace/msr/tuo_common_setup/esmi_install" CACHE PATH "") + +# path to HSMP install +set(HSMP_DIR "/usr/workspace/msr/brink2-2025-amd-hsmp-tuo/amd_hsmp" CACHE PATH "") diff --git a/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1-rocm@7.0.1-both.cmake b/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1-rocm@7.0.1-both.cmake new file mode 100644 index 000000000..09ee4b2e6 --- /dev/null +++ b/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1-rocm@7.0.1-both.cmake @@ -0,0 +1,35 @@ +# Copyright 2019-2023 Lawrence Livermore National Security, LLC and other +# Variorum Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +# c compiler +set(CMAKE_C_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gcc" CACHE PATH "") + +# cpp compiler +set(CMAKE_CXX_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/g++" CACHE PATH "") + +# fortran compiler +set(CMAKE_Fortran_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gfortran" CACHE PATH "") + +set(ENABLE_MPI OFF CACHE BOOL "") +set(ENABLE_OPENMP OFF CACHE BOOL "") + +set(BUILD_TESTS OFF CACHE BOOL "") + +set(VARIORUM_DEBUG OFF CACHE BOOL "") + +set(VARIORUM_WITH_AMD_CPU ON CACHE BOOL "") +set(VARIORUM_WITH_AMD_GPU ON CACHE BOOL "") +set(VARIORUM_WITH_ARM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_IBM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_GPU OFF CACHE BOOL "") +set(VARIORUM_WITH_NVIDIA_GPU OFF CACHE BOOL "") + +# path to e_smi_library install +set(ESMI_DIR "/usr/workspace/msr/tuo_common_setup/esmi_install" CACHE PATH "") + +#ROCm setup +set(CMAKE_SHARED_LINKER_FLAGS "-L/opt/rocm-7.0.1/lib -lrocm_smi64" CACHE PATH "") +set(ROCM_DIR "/opt/rocm-7.0.1/" CACHE PATH "") diff --git a/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1.cmake b/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1.cmake new file mode 100644 index 000000000..028616922 --- /dev/null +++ b/host-configs/tuolumne-4.18.0toss.t4.x86_64-gcc@13.3.1.cmake @@ -0,0 +1,31 @@ +# Copyright 2019-2023 Lawrence Livermore National Security, LLC and other +# Variorum Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +# c compiler +set(CMAKE_C_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gcc" CACHE PATH "") + +# cpp compiler +set(CMAKE_CXX_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/g++" CACHE PATH "") + +# fortran compiler +set(CMAKE_Fortran_COMPILER "/usr/tce/packages/gcc/gcc-13.3.1/bin/gfortran" CACHE PATH "") + +set(ENABLE_MPI OFF CACHE BOOL "") +set(ENABLE_OPENMP OFF CACHE BOOL "") + +set(BUILD_TESTS OFF CACHE BOOL "") + +set(VARIORUM_DEBUG OFF CACHE BOOL "") + +set(VARIORUM_WITH_AMD_CPU ON CACHE BOOL "") +set(VARIORUM_WITH_AMD_GPU OFF CACHE BOOL "") +set(VARIORUM_WITH_ARM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_IBM_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_CPU OFF CACHE BOOL "") +set(VARIORUM_WITH_INTEL_GPU OFF CACHE BOOL "") +set(VARIORUM_WITH_NVIDIA_GPU OFF CACHE BOOL "") + +# path to e_smi_library install +set(ESMI_DIR "/usr/workspace/msr/tuo_common_setup/esmi_install" CACHE PATH "") diff --git a/src/CMake/Setup3rdParty.cmake b/src/CMake/Setup3rdParty.cmake index 07639e594..dac93866b 100644 --- a/src/CMake/Setup3rdParty.cmake +++ b/src/CMake/Setup3rdParty.cmake @@ -6,7 +6,8 @@ include(CMake/thirdparty/SetupHwloc.cmake) include(CMake/thirdparty/SetupJansson.cmake) if(VARIORUM_WITH_AMD_CPU) -include(CMake/thirdparty/Setupesmi.cmake) + include(CMake/thirdparty/Setupesmi.cmake) + include(CMake/thirdparty/Setuphsmp.cmake) endif() if(VARIORUM_WITH_NVIDIA_GPU) include(CMake/thirdparty/FindNVML.cmake) diff --git a/src/CMake/thirdparty/Setuphsmp.cmake b/src/CMake/thirdparty/Setuphsmp.cmake new file mode 100644 index 000000000..8a9bedde1 --- /dev/null +++ b/src/CMake/thirdparty/Setuphsmp.cmake @@ -0,0 +1,30 @@ +# First check for user-specified HSMP_DIR +if(HSMP_DIR) + MESSAGE(STATUS "Looking for HSMP using HSMP_DIR = ${HSMP_DIR}") + + set(HSMP_FOUND TRUE) + set(HSMP_INCLUDE_DIRS ${HSMP_DIR}) + + set(HSMP_DIR ${HSMP_DIR} CACHE PATH "" FORCE) + + message(STATUS "FOUND HSMP at ${HSMP_DIR}") + message(STATUS " [*] HSMP_INCLUDE_DIRS = ${HSMP_INCLUDE_DIRS}") +# If HSMP_DIR not specified, then try to automatically find the HWLOC header +# and library +elseif(NOT HSMP_DIR) + find_path(HSMP_INCLUDE_DIRS + NAMES amd_hsmp.h + ) + + if(HSMP_INCLUDE_DIRS) + set(HSMP_FOUND TRUE) + message(STATUS "FOUND HSMP using find_library()") + message(STATUS " [*] HSMP_INCLUDE_DIRS = ${HSMP_INCLUDE_DIRS}") + message(STATUS " [*] HSMP_LIBRARY = ${HSMP_LIBRARY}") + endif() +endif() + +# Abort if all methods fail +if(NOT HSMP_FOUND) + MESSAGE(FATAL_ERROR "HSMP support needed") +endif() diff --git a/src/variorum/AMD/config_amd.c b/src/variorum/AMD/config_amd.c index 8326d9c86..cc41ad7ec 100644 --- a/src/variorum/AMD/config_amd.c +++ b/src/variorum/AMD/config_amd.c @@ -49,6 +49,7 @@ int set_amd_func_ptrs(int idx) { case 0x0 ... 0xF: case 0x30 ... 0x3F: + case 0x90: break; default: return VARIORUM_ERROR_UNSUPPORTED_PLATFORM; diff --git a/src/variorum/AMD/epyc.c b/src/variorum/AMD/epyc.c index 9c8bcce9d..eec64c5d4 100644 --- a/src/variorum/AMD/epyc.c +++ b/src/variorum/AMD/epyc.c @@ -31,6 +31,7 @@ int amd_cpu_epyc_get_power(int long_ver) int i, ret; uint32_t current_power; + uint32_t ccd_power; static int initial = 0; static struct timeval start; @@ -63,7 +64,9 @@ int amd_cpu_epyc_get_power(int long_ver) gettimeofday(&now, NULL); current_power = 0; + ccd_power = 0; ret = esmi_socket_power_get(i, ¤t_power); + //ret = esmi_read_ccd_power(i, ¤t_power); if (ret != 0) { fprintf(stdout, "Failed to get socket[%d] _POWER, " @@ -103,6 +106,25 @@ int amd_cpu_epyc_get_power(int long_ver) #endif } } + +#ifdef VARIORUM_WITH_AMD_CPU + for (i = 0; i < g_platform[P_AMD_CPU_IDX].total_cores; i++) +#endif + { + ret = esmi_read_ccd_power(i, &ccd_power); + if (ret != 0) + { + fprintf(stdout, "Failed to get core[%d] _COREENERGY, Err[%d]:%s\n", + i, ret, esmi_get_err_msg(ret)); + continue; + } + else + { + fprintf(stdout, "RRR _AMDPOWER %s %d %f %lf\n", + hostname, i, (double)ccd_power / 1000, + now.tv_sec - start.tv_sec + (now.tv_usec - start.tv_usec) / 1000000.0); + } + } } #ifdef LIBJUSTIFY_FOUND cflush() @@ -151,6 +173,7 @@ int amd_cpu_epyc_get_power_limits(int long_ver) pcap_current = 0; pcap_max = 0; ret = esmi_socket_power_get(i, &power); + //ret = esmi_read_ccd_power(i, &power); if (ret != 0) { fprintf(stdout, "Failed to get socket[%d] _POWER, Err[%d]:%s\n", @@ -682,6 +705,7 @@ int amd_cpu_epyc_get_power_json(json_t *get_power_obj) /* AMD authors declared this as uint32_t and typecast it to double, * not sure why. Just following their lead from the get_power function*/ uint32_t current_power; + uint32_t ccd_power; double node_power = 0.0; int i, ret = 0; int sockID_len = 32; @@ -696,7 +720,9 @@ int amd_cpu_epyc_get_power_json(json_t *get_power_obj) json_object_set_new(get_power_obj, sockID, socket_obj); current_power = 0; + ccd_power = 0; ret = esmi_socket_power_get(i, ¤t_power); + //ret = esmi_read_ccd_power(i, ¤t_power); if (ret != 0) { fprintf(stdout, "Failed to get socket[%d] _POWER, " @@ -714,6 +740,23 @@ int amd_cpu_epyc_get_power_json(json_t *get_power_obj) json_object_set_new(socket_obj, "power_mem_watts", json_real(-1.0)); node_power += ((double)current_power / 1000); + +#ifdef VARIORUM_WITH_AMD_CPU + for (i = 0; i < g_platform[P_AMD_CPU_IDX].total_cores; i++) +#endif + { + ret = esmi_read_ccd_power(i, &ccd_power); + if (ret != 0) + { + fprintf(stdout, "Failed to get core[%d] _COREENERGY, Err[%d]:%s\n", + i, ret, esmi_get_err_msg(ret)); + continue; + } + else + { + fprintf(stdout, "RRR _AMDPOWER %d %f\n", i, (double)ccd_power / 1000); + } + } } // Set the node power key with pwrnode value. diff --git a/src/variorum/CMakeLists.txt b/src/variorum/CMakeLists.txt index e696d5a48..7364f09d1 100644 --- a/src/variorum/CMakeLists.txt +++ b/src/variorum/CMakeLists.txt @@ -36,6 +36,7 @@ set(variorum_includes ${CMAKE_CURRENT_SOURCE_DIR} ${HWLOC_INCLUDE_DIRS} ${ESMI_INCLUDE_DIRS} + ${HSMP_INCLUDE_DIRS} ) if(VARIORUM_WITH_INTEL_CPU)