diff --git a/CMakeLists.txt b/CMakeLists.txt index f3d605a810..7cb22061aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ # Copyright (c) GFDL, @underwoo -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) +cmake_minimum_required(VERSION 3.22 FATAL_ERROR) # Define the CMake project project(FMS @@ -26,7 +26,7 @@ project(FMS DESCRIPTION "GFDL FMS Library" HOMEPAGE_URL "https://www.gfdl.noaa.gov/fms" LANGUAGES C Fortran) - +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include(GNUInstallDirs) if(NOT CMAKE_BUILD_TYPE MATCHES "^(Debug|Release|RelWithDebInfo|MinSizeRel|DebugUFS|ReleaseUFS)$") @@ -65,6 +65,11 @@ option(PORTABLE_KINDS "Enable compiler definition -DPORTABLE_KINDS" option(GFS_PHYS "Enable compiler definition -DGFS_PHYS" OFF) option(WITH_YAML "Enable compiler definition -Duse_yaml" OFF) +if(CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC") + set(ENABLE_QUAD_PRECISION OFF) + message(WARNING "NVHPC does not support building FMS with QUAD_PRECISION!") +endif() + if(32BIT) list(APPEND kinds "r4") message(STATUS "Building library with 4-byte real defaults (with mixed precision real support for most modules).") @@ -329,6 +334,7 @@ foreach(kind ${kinds}) mosaic2/include constants astronomy/include + field_manager/ field_manager/include time_interp/include tracer_manager/include diff --git a/cmake/compiler_flags_NVHPC_C.cmake b/cmake/compiler_flags_NVHPC_C.cmake new file mode 100644 index 0000000000..fe6ac05f9f --- /dev/null +++ b/cmake/compiler_flags_NVHPC_C.cmake @@ -0,0 +1,6 @@ +# NVHPC C +set(CMAKE_C_FLAGS_RELEASE "-O3") + +set(CMAKE_C_FLAGS_DEBUG "-O0 -g") + +set(CMAKE_C_LINK_FLAGS "") diff --git a/cmake/compiler_flags_NVHPC_Fortran.cmake b/cmake/compiler_flags_NVHPC_Fortran.cmake new file mode 100644 index 0000000000..954e82f498 --- /dev/null +++ b/cmake/compiler_flags_NVHPC_Fortran.cmake @@ -0,0 +1,13 @@ +# Precision-based Fortran compiler flags +set(r8_flags "-r8") # Fortran flags for 64BIT precision + +# NVHPC Fortan +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ") + +set(CMAKE_Fortran_FLAGS_RELEASE "-O3") + +set(CMAKE_Fortran_FLAGS_DEBUG "-O0" ) +# -g can cause bugs, see: https://forums.developer.nvidia.com/t/bug-compiling-with-g-o0-produces-a-compute-sanitizer-error-removing-g-removes-the-error/341478 +# not sure if this is only on GPUs + +set(CMAKE_Fortran_LINK_FLAGS "" ) diff --git a/cmake/fms_compiler_flags.cmake b/cmake/fms_compiler_flags.cmake index cc7a2fee0f..be84877342 100644 --- a/cmake/fms_compiler_flags.cmake +++ b/cmake/fms_compiler_flags.cmake @@ -8,6 +8,8 @@ elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") include(compiler_flags_Intel_Fortran) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM") include(compiler_flags_IntelLLVM_Fortran) +elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") + include(compiler_flags_NVHPC_Fortran) else() message(WARNING "Fortran compiler with ID ${CMAKE_Fortran_COMPILER_ID} will be used with CMake default options") endif() @@ -24,6 +26,8 @@ elseif(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") include(compiler_flags_IntelLLVM_C) elseif(CMAKE_C_COMPILER_ID MATCHES "Clang") include(compiler_flags_Clang_C) +elseif(CMAKE_C_COMPILER_ID MATCHES "NVHPC") + include(compiler_flags_NVHPC_C) else() message(WARNING "C compiler with ID ${CMAKE_C_COMPILER_ID} will be used with CMake default options") endif() diff --git a/exchange/xgrid.F90 b/exchange/xgrid.F90 index 947c30a530..127cafb125 100644 --- a/exchange/xgrid.F90 +++ b/exchange/xgrid.F90 @@ -4023,9 +4023,12 @@ subroutine get_1_from_xgrid_repro(d_addrs, x_addrs, xmap, xsize, lsize) real(r8_kind), pointer :: x(:) real(r8_kind), pointer, contiguous :: tmpptr(:,:) integer :: shape_d(2) + integer :: i_off, j_off call mpp_clock_begin(id_get_1_from_xgrid_repro) shape_d = [xmap%grids(1)%ie_me-xmap%grids(1)%is_me+1, xmap%grids(1)%je_me-xmap%grids(1)%js_me+1] + i_off = xmap%grids(1)%is_me - 1 + j_off = xmap%grids(1)%js_me - 1 comm => xmap%get1_repro !--- pre-post receiving do p = 1, comm%nrecv @@ -4071,17 +4074,17 @@ subroutine get_1_from_xgrid_repro(d_addrs, x_addrs, xmap, xsize, lsize) do l = 1, lsize call c_f_pointer(d_addrs(l), tmpptr, shape=shape_d) - d(xmap%grids(1)%is_me:xmap%grids(1)%ie_me, xmap%grids(1)%js_me:xmap%grids(1)%je_me) => tmpptr + d => tmpptr d = 0 enddo call mpp_sync_self(check=EVENT_RECV) -!$OMP parallel do default(none) shared(lsize,shape_d,d_addrs,xmap,recv_buffer,pl,ml) & +!$OMP parallel do default(none) shared(lsize,shape_d,d_addrs,xmap,recv_buffer,pl,ml,i_off,j_off) & !$OMP private(d,tmpptr,grid,i,j,p,pos) do l = 1, lsize call c_f_pointer(d_addrs(l), tmpptr, shape=shape_d) - d(xmap%grids(1)%is_me:xmap%grids(1)%ie_me, xmap%grids(1)%js_me:xmap%grids(1)%je_me) => tmpptr + d => tmpptr do g=2,size(xmap%grids(:)) grid => xmap%grids(g) do l3=1,grid%size_repro ! index into side1 grid's patterns @@ -4089,7 +4092,7 @@ subroutine get_1_from_xgrid_repro(d_addrs, x_addrs, xmap, xsize, lsize) j = grid%x_repro(l3)%j1 p = grid%x_repro(l3)%pe-xmap%root_pe pos = pl(p) + (l-1)*ml(p) + grid%x_repro(l3)%recv_pos - d(i,j) = d(i,j) + recv_buffer(pos) + d(i - i_off, j - j_off) = d(i - i_off, j - j_off) + recv_buffer(pos) end do end do ! normalize with side 1 grid cell areas