Skip to content

Commit 9ef3687

Browse files
committed
Merge branch 'feat/55474' of https://github.com/a-tarasyuk/llvm-project into feat/55474
2 parents 9463177 + ba6d791 commit 9ef3687

File tree

150 files changed

+12825
-11188
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

150 files changed

+12825
-11188
lines changed

clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,9 @@ void NarrowingConversionsCheck::handleFloatingCast(const ASTContext &Context,
513513
return;
514514
}
515515
const BuiltinType *FromType = getBuiltinType(Rhs);
516-
if (ToType->getKind() < FromType->getKind())
516+
if (!llvm::APFloatBase::isRepresentableBy(
517+
Context.getFloatTypeSemantics(FromType->desugar()),
518+
Context.getFloatTypeSemantics(ToType->desugar())))
517519
diagNarrowType(SourceLoc, Lhs, Rhs);
518520
}
519521
}

clang-tools-extra/docs/ReleaseNotes.rst

+5
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,11 @@ Changes in existing checks
210210
<clang-tidy/checks/bugprone/forwarding-reference-overload>` check by fixing
211211
a crash when determining if an ``enable_if[_t]`` was found.
212212

213+
- Improve :doc:`bugprone-narrowing-conversions
214+
<clang-tidy/checks/bugprone/narrowing-conversions>` to avoid incorrect check
215+
results when floating point type is not ``float``, ``double`` and
216+
``long double``.
217+
213218
- Improved :doc:`bugprone-optional-value-conversion
214219
<clang-tidy/checks/bugprone/optional-value-conversion>` to support detecting
215220
conversion directly by ``std::make_unique`` and ``std::make_shared``.

clang-tools-extra/test/clang-tidy/checkers/bugprone/narrowing-conversions-narrowingfloatingpoint-option.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ void narrow_double_to_float_not_ok(double d) {
3636
f = narrow_double_to_float_return();
3737
}
3838

39+
float narrow_float16_to_float_return(_Float16 f) {
40+
return f;
41+
}
42+
43+
_Float16 narrow_float_to_float16_return(float f) {
44+
return f;
45+
// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: narrowing conversion from 'float' to '_Float16' [bugprone-narrowing-conversions]
46+
}
47+
3948
void narrow_fp_constants() {
4049
float f;
4150
f = 0.5; // [dcl.init.list] 7.2 : in-range fp constant to narrower float is not a narrowing.

clang/include/clang/Driver/Options.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -4364,7 +4364,7 @@ defm split_machine_functions: BoolFOption<"split-machine-functions",
43644364
CodeGenOpts<"SplitMachineFunctions">, DefaultFalse,
43654365
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Enable">,
43664366
NegFlag<SetFalse, [], [ClangOption], "Disable">,
4367-
BothFlags<[], [ClangOption], " late function splitting using profile information (x86 ELF)">>;
4367+
BothFlags<[], [ClangOption], " late function splitting using profile information (x86 and aarch64 ELF)">>;
43684368

43694369
defm strict_return : BoolFOption<"strict-return",
43704370
CodeGenOpts<"StrictReturn">, DefaultTrue,

clang/lib/Analysis/UnsafeBufferUsage.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,11 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
453453
return false;
454454
}
455455

456-
if (const auto *IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
457-
const APInt ArrIdx = IdxLit->getValue();
456+
Expr::EvalResult EVResult;
457+
if (Node.getIdx()->EvaluateAsInt(EVResult, Finder->getASTContext())) {
458+
llvm::APSInt ArrIdx = EVResult.Val.getInt();
459+
// FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
460+
// bug
458461
if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < limit)
459462
return true;
460463
}

clang/lib/CodeGen/CGCall.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4507,7 +4507,7 @@ void CodeGenFunction::EmitCallArgs(
45074507
// First, if a prototype was provided, use those argument types.
45084508
bool IsVariadic = false;
45094509
if (Prototype.P) {
4510-
const auto *MD = Prototype.P.dyn_cast<const ObjCMethodDecl *>();
4510+
const auto *MD = dyn_cast<const ObjCMethodDecl *>(Prototype.P);
45114511
if (MD) {
45124512
IsVariadic = MD->isVariadic();
45134513
ExplicitCC = getCallingConventionForDecl(

clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,35 @@ char access_strings() {
9292
c = array_string[5];
9393
return c;
9494
}
95+
96+
struct T {
97+
int array[10];
98+
};
99+
100+
const int index = 1;
101+
102+
constexpr int get_const(int x) {
103+
if(x < 3)
104+
return ++x;
105+
else
106+
return x + 5;
107+
};
108+
109+
void array_indexed_const_expr(unsigned idx) {
110+
// expected-note@+2 {{change type of 'arr' to 'std::array' to label it for hardening}}
111+
// expected-warning@+1{{'arr' is an unsafe buffer that does not perform bounds checks}}
112+
int arr[10];
113+
arr[sizeof(int)] = 5;
114+
115+
int array[sizeof(T)];
116+
array[sizeof(int)] = 5;
117+
array[sizeof(T) -1 ] = 3;
118+
119+
int k = arr[6 & 5];
120+
k = arr[2 << index];
121+
k = arr[8 << index]; // expected-note {{used in buffer access here}}
122+
k = arr[16 >> 1];
123+
k = arr[get_const(index)];
124+
k = arr[get_const(5)]; // expected-note {{used in buffer access here}}
125+
k = arr[get_const(4)];
126+
}

compiler-rt/lib/scudo/standalone/primary64.h

+16-12
Original file line numberDiff line numberDiff line change
@@ -1141,18 +1141,18 @@ template <typename Config> class SizeClassAllocator64 {
11411141
BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
11421142
}
11431143
const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize;
1144-
Str->append(
1145-
"%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu "
1146-
"inuse: %6zu total: %6zu releases: %6zu last "
1147-
"releases attempted: %6zuK latest pushed bytes: %6zuK region: 0x%zx "
1148-
"(0x%zx)\n",
1149-
Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId),
1150-
Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks,
1151-
Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks,
1152-
Region->ReleaseInfo.NumReleasesAttempted,
1153-
Region->ReleaseInfo.LastReleasedBytes >> 10,
1154-
RegionPushedBytesDelta >> 10, Region->RegionBeg,
1155-
getRegionBaseByClassId(ClassId));
1144+
Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu "
1145+
"inuse: %6zu total: %6zu releases attempted: %6zu last "
1146+
"released: %6zuK latest pushed bytes: %6zuK region: 0x%zx "
1147+
"(0x%zx)\n",
1148+
Region->Exhausted ? "E" : " ", ClassId,
1149+
getSizeByClassId(ClassId), Region->MemMapInfo.MappedUser >> 10,
1150+
Region->FreeListInfo.PoppedBlocks,
1151+
Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks,
1152+
Region->ReleaseInfo.NumReleasesAttempted,
1153+
Region->ReleaseInfo.LastReleasedBytes >> 10,
1154+
RegionPushedBytesDelta >> 10, Region->RegionBeg,
1155+
getRegionBaseByClassId(ClassId));
11561156
}
11571157

11581158
void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId,
@@ -1297,6 +1297,10 @@ template <typename Config> class SizeClassAllocator64 {
12971297
return 0;
12981298
}
12991299

1300+
// The following steps contribute to the majority time spent in page
1301+
// releasing thus we increment the counter here.
1302+
++Region->ReleaseInfo.NumReleasesAttempted;
1303+
13001304
// Note that we have extracted the `GroupsToRelease` from region freelist.
13011305
// It's safe to let pushBlocks()/popBlocks() access the remaining region
13021306
// freelist. In the steps 3 and 4, we will temporarily release the FLLock

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

+44-21
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,23 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
366366
const fir::LLVMTypeConverter *typeConverter;
367367
};
368368

369+
static mlir::Value genGetDeviceAddress(mlir::PatternRewriter &rewriter,
370+
mlir::ModuleOp mod, mlir::Location loc,
371+
mlir::Value inputArg) {
372+
fir::FirOpBuilder builder(rewriter, mod);
373+
mlir::func::FuncOp callee =
374+
fir::runtime::getRuntimeFunc<mkRTKey(CUFGetDeviceAddress)>(loc, builder);
375+
auto fTy = callee.getFunctionType();
376+
mlir::Value conv = createConvertOp(rewriter, loc, fTy.getInput(0), inputArg);
377+
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
378+
mlir::Value sourceLine =
379+
fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
380+
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
381+
builder, loc, fTy, conv, sourceFile, sourceLine)};
382+
auto call = rewriter.create<fir::CallOp>(loc, callee, args);
383+
return createConvertOp(rewriter, loc, inputArg.getType(), call->getResult(0));
384+
}
385+
369386
struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> {
370387
using OpRewritePattern::OpRewritePattern;
371388

@@ -382,26 +399,10 @@ struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> {
382399
if (cuf::isRegisteredDeviceGlobal(global)) {
383400
rewriter.setInsertionPointAfter(addrOfOp);
384401
auto mod = op->getParentOfType<mlir::ModuleOp>();
385-
fir::FirOpBuilder builder(rewriter, mod);
386-
mlir::Location loc = op.getLoc();
387-
mlir::func::FuncOp callee =
388-
fir::runtime::getRuntimeFunc<mkRTKey(CUFGetDeviceAddress)>(
389-
loc, builder);
390-
auto fTy = callee.getFunctionType();
391-
mlir::Type toTy = fTy.getInput(0);
392-
mlir::Value inputArg =
393-
createConvertOp(rewriter, loc, toTy, addrOfOp.getResult());
394-
mlir::Value sourceFile =
395-
fir::factory::locationToFilename(builder, loc);
396-
mlir::Value sourceLine =
397-
fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
398-
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
399-
builder, loc, fTy, inputArg, sourceFile, sourceLine)};
400-
auto call = rewriter.create<fir::CallOp>(loc, callee, args);
401-
mlir::Value cast = createConvertOp(
402-
rewriter, loc, op.getMemref().getType(), call->getResult(0));
402+
mlir::Value devAddr = genGetDeviceAddress(rewriter, mod, op.getLoc(),
403+
addrOfOp.getResult());
403404
rewriter.startOpModification(op);
404-
op.getMemrefMutable().assign(cast);
405+
op.getMemrefMutable().assign(devAddr);
405406
rewriter.finalizeOpModification(op);
406407
return success();
407408
}
@@ -771,10 +772,32 @@ struct CUFLaunchOpConversion
771772
loc, clusterDimsAttr.getZ().getInt());
772773
}
773774
}
775+
llvm::SmallVector<mlir::Value> args;
776+
auto mod = op->getParentOfType<mlir::ModuleOp>();
777+
for (mlir::Value arg : op.getArgs()) {
778+
// If the argument is a global descriptor, make sure we pass the device
779+
// copy of this descriptor and not the host one.
780+
if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(arg.getType()))) {
781+
if (auto declareOp =
782+
mlir::dyn_cast_or_null<fir::DeclareOp>(arg.getDefiningOp())) {
783+
if (auto addrOfOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
784+
declareOp.getMemref().getDefiningOp())) {
785+
if (auto global = symTab.lookup<fir::GlobalOp>(
786+
addrOfOp.getSymbol().getRootReference().getValue())) {
787+
if (cuf::isRegisteredDeviceGlobal(global)) {
788+
arg = genGetDeviceAddress(rewriter, mod, op.getLoc(),
789+
declareOp.getResult());
790+
}
791+
}
792+
}
793+
}
794+
}
795+
args.push_back(arg);
796+
}
797+
774798
auto gpuLaunchOp = rewriter.create<mlir::gpu::LaunchFuncOp>(
775799
loc, kernelName, mlir::gpu::KernelDim3{gridSizeX, gridSizeY, gridSizeZ},
776-
mlir::gpu::KernelDim3{blockSizeX, blockSizeY, blockSizeZ}, zero,
777-
op.getArgs());
800+
mlir::gpu::KernelDim3{blockSizeX, blockSizeY, blockSizeZ}, zero, args);
778801
if (clusterDimX && clusterDimY && clusterDimZ) {
779802
gpuLaunchOp.getClusterSizeXMutable().assign(clusterDimX);
780803
gpuLaunchOp.getClusterSizeYMutable().assign(clusterDimY);

flang/lib/Semantics/resolve-names.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -4015,7 +4015,9 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
40154015
*attrs == common::CUDASubprogramAttrs::Device) {
40164016
const Scope &scope{currScope()};
40174017
const Scope *mod{FindModuleContaining(scope)};
4018-
if (mod && mod->GetName().value() == "cudadevice") {
4018+
if (mod &&
4019+
(mod->GetName().value() == "cudadevice" ||
4020+
mod->GetName().value() == "__cuda_device")) {
40194021
return false;
40204022
}
40214023
// Implicitly USE the cudadevice module by copying its symbols in the

flang/module/__cuda_device.f90

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
!===-- module/__cuda_device.f90 --------------------------------------------===!
2+
!
3+
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
! See https://llvm.org/LICENSE.txt for license information.
5+
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
!
7+
!===------------------------------------------------------------------------===!
8+
9+
! This module contains CUDA Fortran interfaces used in cudadevice.f90.
10+
11+
module __cuda_device
12+
implicit none
13+
14+
! Set PRIVATE by default to explicitly only export what is meant
15+
! to be exported by this MODULE.
16+
17+
interface
18+
attributes(device) function __fadd_rd(x, y) bind(c, name='__nv_fadd_rd')
19+
real, intent(in), value :: x, y
20+
real :: __fadd_rd
21+
end function
22+
end interface
23+
public :: __fadd_rd
24+
25+
interface
26+
attributes(device) function __fadd_ru(x, y) bind(c, name='__nv_fadd_ru')
27+
real, intent(in), value :: x, y
28+
real :: __fadd_ru
29+
end function
30+
end interface
31+
public :: __fadd_ru
32+
end module

flang/module/cudadevice.f90

+1-16
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
! CUDA Fortran procedures available in device subprogram
1010

1111
module cudadevice
12+
use __cuda_device, only: __fadd_rd, __fadd_ru
1213
implicit none
1314

1415
! Set PRIVATE by default to explicitly only export what is meant
@@ -71,20 +72,4 @@ attributes(device) subroutine threadfence_system()
7172
end interface
7273
public :: threadfence_system
7374

74-
interface
75-
attributes(device) function __fadd_rd(x, y) bind(c, name='__nv_fadd_rd')
76-
real, intent(in) :: x, y
77-
real :: __fadd_rd
78-
end function
79-
end interface
80-
public :: __fadd_rd
81-
82-
interface
83-
attributes(device) function __fadd_ru(x, y) bind(c, name='__nv_fadd_ru')
84-
real, intent(in) :: x, y
85-
real :: __fadd_ru
86-
end function
87-
end interface
88-
public :: __fadd_ru
89-
9075
end module

flang/test/Fir/CUDA/cuda-launch.fir

+42
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,45 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
6262
// CHECK-LABEL: func.func @_QMmod1Phost_sub()
6363
// CHECK: gpu.launch_func @cuda_device_mod::@_QMmod1Psub1 clusters in (%c2{{.*}}, %c2{{.*}}, %c1{{.*}})
6464

65+
// -----
66+
67+
module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
68+
gpu.module @cuda_device_mod {
69+
gpu.func @_QMdevptrPtest(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) kernel {
70+
gpu.return
71+
}
72+
}
73+
fir.global @_QMdevptrEdev_ptr {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<!fir.array<?xf32>>> {
74+
%c0 = arith.constant 0 : index
75+
%0 = fir.zero_bits !fir.ptr<!fir.array<?xf32>>
76+
%1 = fir.shape %c0 : (index) -> !fir.shape<1>
77+
%2 = fir.embox %0(%1) {allocator_idx = 2 : i32} : (!fir.ptr<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
78+
fir.has_value %2 : !fir.box<!fir.ptr<!fir.array<?xf32>>>
79+
}
80+
func.func @_QMdevptrPtest(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "dp"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
81+
return
82+
}
83+
func.func @_QQmain() {
84+
%c1_i32 = arith.constant 1 : i32
85+
%c4 = arith.constant 4 : index
86+
%0 = cuf.alloc !fir.array<4xf32> {bindc_name = "a_dev", data_attr = #cuf.cuda<device>, uniq_name = "_QFEa_dev"} -> !fir.ref<!fir.array<4xf32>>
87+
%1 = fir.shape %c4 : (index) -> !fir.shape<1>
88+
%2 = fir.declare %0(%1) {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFEa_dev"} : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<4xf32>>
89+
%3 = fir.address_of(@_QMdevptrEdev_ptr) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
90+
%4 = fir.declare %3 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMdevptrEdev_ptr"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
91+
%5 = fir.embox %2(%1) : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
92+
fir.store %5 to %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
93+
cuf.sync_descriptor @_QMdevptrEdev_ptr
94+
cuf.kernel_launch @_QMdevptrPtest<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>(%4) : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
95+
cuf.free %2 : !fir.ref<!fir.array<4xf32>> {data_attr = #cuf.cuda<device>}
96+
return
97+
}
98+
}
99+
100+
// CHECK-LABEL: func.func @_QQmain()
101+
// CHECK: %[[ADDROF:.*]] = fir.address_of(@_QMdevptrEdev_ptr) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
102+
// CHECK: %[[DECL:.*]] = fir.declare %[[ADDROF]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMdevptrEdev_ptr"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
103+
// CHECK: %[[CONV_DECL:.*]] = fir.convert %[[DECL]] : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.llvm_ptr<i8>
104+
// CHECK: %[[DEVADDR:.*]] = fir.call @_FortranACUFGetDeviceAddress(%[[CONV_DECL]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
105+
// CHECK: %[[CONV_DEVADDR:.*]] = fir.convert %[[DEVADDR]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
106+
// CHECK: gpu.launch_func @cuda_device_mod::@_QMdevptrPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %{{.*}} args(%[[CONV_DEVADDR]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)

flang/tools/f18/CMakeLists.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set(MODULES_WITHOUT_IMPLEMENTATION
2121
"__ppc_intrinsics"
2222
"mma"
2323
"__cuda_builtins"
24+
"__cuda_device"
2425
"cudadevice"
2526
"ieee_arithmetic"
2627
"ieee_exceptions"
@@ -67,9 +68,12 @@ if (NOT CMAKE_CROSSCOMPILING)
6768
elseif(${filename} STREQUAL "__ppc_intrinsics" OR
6869
${filename} STREQUAL "mma")
6970
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
70-
elseif(${filename} STREQUAL "cudadevice")
71+
elseif(${filename} STREQUAL "__cuda_device")
7172
set(opts -fc1 -xcuda)
7273
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
74+
elseif(${filename} STREQUAL "cudadevice")
75+
set(opts -fc1 -xcuda)
76+
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod)
7377
else()
7478
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
7579
if(${filename} STREQUAL "iso_fortran_env")

0 commit comments

Comments
 (0)