diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index b6b114f0e4b9..6e0296ea23d7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -1513,17 +1513,64 @@ mlir::Value ScalarExprEmitter::emitSub(const BinOpInfo &Ops) { if (!mlir::isa(Ops.RHS.getType())) return emitPointerArithmetic(CGF, Ops, /*isSubtraction=*/true); - // Otherwise, this is a pointer subtraction - // Do the raw subtraction part. - // - // TODO(cir): note for LLVM lowering out of this; when expanding this into - // LLVM we shall take VLA's, division by element size, etc. - // - // See more in `EmitSub` in CGExprScalar.cpp. + mlir::Value lhs = Ops.LHS; + mlir::Value rhs = Ops.RHS; + + cir::PointerType lhsPtrTy = mlir::dyn_cast(lhs.getType()); + cir::PointerType rhsPtrTy = mlir::dyn_cast(rhs.getType()); + + if (lhsPtrTy && rhsPtrTy) { + cir::AddressSpace lhsAS = lhsPtrTy.getAddrSpace(); + cir::AddressSpace rhsAS = rhsPtrTy.getAddrSpace(); + + if (lhsAS != rhsAS) { + // Different address spaces → use addrspacecast + rhs = Builder.createAddrSpaceCast(rhs, lhsPtrTy); + } else if (lhsPtrTy != rhsPtrTy) { + // Same addrspace but different pointee/type → bitcast is fine + rhs = Builder.createBitcast(rhs, lhsPtrTy); + } + } + assert(!cir::MissingFeatures::llvmLoweringPtrDiffConsidersPointee()); - return cir::PtrDiffOp::create(Builder, CGF.getLoc(Ops.Loc), CGF.PtrDiffTy, - Ops.LHS, Ops.RHS); + mlir::Value diff = cir::PtrDiffOp::create(Builder, CGF.getLoc(Ops.Loc), + CGF.PtrDiffTy, lhs, rhs); + + const BinaryOperator *expr = cast(Ops.E); + QualType elementType = expr->getLHS()->getType()->getPointeeType(); + + mlir::Location loc = CGF.getLoc(Ops.Loc); + mlir::Value divisor; + + // Check if this is a VLA pointee type. + if (const auto *vla = CGF.getContext().getAsVariableArrayType(elementType)) { + auto vlaSize = CGF.getVLASize(vla); + elementType = vlaSize.Type; + divisor = vlaSize.NumElts; + + CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType); + if (!eltSize.isOne()) { + cir::IntType cirIntTy = llvm::cast(CGF.PtrDiffTy); + cir::IntAttr eltSizeAttr = + cir::IntAttr::get(cirIntTy, eltSize.getQuantity()); + + if (divisor.getType() != CGF.PtrDiffTy) + divisor = Builder.createIntCast(divisor, CGF.PtrDiffTy); + + auto eltSizeVal = + cir::ConstantOp::create(Builder, loc, cirIntTy, eltSizeAttr) + .getResult(); + divisor = Builder.createNUWAMul(eltSizeVal, divisor); + } + } else { + // cir::ptrdiff correctly computes the ABI difference of 2 pointers. We + // do not need to compute anything else here. We just return it. + return diff; + } + + return cir::BinOp::create(Builder, loc, CGF.PtrDiffTy, cir::BinOpKind::Div, + diff, divisor); } mlir::Value ScalarExprEmitter::emitShl(const BinOpInfo &Ops) { diff --git a/clang/test/CIR/CodeGen/HIP/ptr-diff.cpp b/clang/test/CIR/CodeGen/HIP/ptr-diff.cpp new file mode 100644 index 000000000000..10cb3832b00a --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/ptr-diff.cpp @@ -0,0 +1,60 @@ +#include "cuda.h" + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.ll +// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s + +__device__ int ptr_diff() { + const char c_str[] = "c-string"; + const char* len = c_str; + return c_str - len; +} + + +// CIR-DEVICE: %[[#LenLocalAddr:]] = cir.alloca !cir.ptr, !cir.ptr>, ["len", init] +// CIR-DEVICE: %[[#GlobalPtr:]] = cir.get_global @_ZZ8ptr_diffvE5c_str : !cir.ptr, addrspace(offload_constant)> +// CIR-DEVICE: %[[#CastDecay:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr, addrspace(offload_constant)> +// CIR-DEVICE: %[[#LenLocalAddrCast:]] = cir.cast bitcast %[[#LenLocalAddr]] : !cir.ptr> -> !cir.ptr> +// CIR-DEVICE: cir.store align(8) %[[#CastDecay]], %[[#LenLocalAddrCast]] : !cir.ptr, !cir.ptr> +// CIR-DEVICE: %[[#CStr:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr, addrspace(offload_constant)> -> !cir.ptr +// CIR-DEVICE: %[[#LoadedLenAddr:]] = cir.load align(8) %[[#LenLocalAddr]] : !cir.ptr>, !cir.ptr loc(#loc7) +// CIR-DEVICE: %[[#AddrCast:]] = cir.cast address_space %[[#LoadedLenAddr]] : !cir.ptr -> !cir.ptr +// CIR-DEVICE: %[[#DIFF:]] = cir.ptr_diff %[[#CStr]], %[[#AddrCast]] : !cir.ptr + +// LLVM-DEVICE: define dso_local i32 @_Z8ptr_diffv() +// LLVM-DEVICE: %[[#GlobalPtrAddr:]] = alloca i32, i64 1, align 4, addrspace(5) +// LLVM-DEVICE: %[[#GlobalPtrCast:]] = addrspacecast ptr addrspace(5) %[[#GlobalPtrAddr]] to ptr +// LLVM-DEVICE: %[[#LenLocalAddr:]] = alloca ptr, i64 1, align 8, addrspace(5) +// LLVM-DEVICE: %[[#LenLocalAddrCast:]] = addrspacecast ptr addrspace(5) %[[#LenLocalAddr]] to ptr +// LLVM-DEVICE: store ptr addrspace(4) @_ZZ8ptr_diffvE5c_str, ptr %[[#LenLocalAddrCast]], align 8 +// LLVM-DEVICE: %[[#LoadedAddr:]] = load ptr, ptr %[[#LenLocalAddrCast]], align 8 +// LLVM-DEVICE: %[[#CastedVal:]] = addrspacecast ptr %[[#LoadedAddr]] to ptr addrspace(4) +// LLVM-DEVICE: %[[#IntVal:]] = ptrtoint ptr addrspace(4) %[[#CastedVal]] to i64 +// LLVM-DEVICE: %[[#SubVal:]] = sub i64 ptrtoint (ptr addrspace(4) @_ZZ8ptr_diffvE5c_str to i64), %[[#IntVal]] + +// OGCG-DEVICE: define dso_local noundef i32 @_Z8ptr_diffv() #0 +// OGCG-DEVICE: %[[RETVAL:.*]] = alloca i32, align 4, addrspace(5) +// OGCG-DEVICE: %[[C_STR:.*]] = alloca [9 x i8], align 1, addrspace(5) +// OGCG-DEVICE: %[[LEN:.*]] = alloca ptr, align 8, addrspace(5) +// OGCG-DEVICE: %[[RETVAL_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[RETVAL]] to ptr +// OGCG-DEVICE: %[[C_STR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[C_STR]] to ptr +// OGCG-DEVICE: %[[LEN_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[LEN]] to ptr +// OGCG-DEVICE: %[[ARRAYDECAY:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0 +// OGCG-DEVICE: store ptr %[[ARRAYDECAY]], ptr %[[LEN_ASCAST]], align 8 +// OGCG-DEVICE: %[[ARRAYDECAY1:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0 +// OGCG-DEVICE: %[[LOADED:.*]] = load ptr, ptr %[[LEN_ASCAST]], align 8 +// OGCG-DEVICE: %[[LHS:.*]] = ptrtoint ptr %[[ARRAYDECAY1]] to i64 +// OGCG-DEVICE: %[[RHS:.*]] = ptrtoint ptr %[[LOADED]] to i64 +// OGCG-DEVICE: %[[SUB:.*]] = sub i64 %[[LHS]], %[[RHS]] +// OGCG-DEVICE: %[[CONV:.*]] = trunc i64 %[[SUB]] to i32 diff --git a/clang/test/CIR/CodeGen/vla.c b/clang/test/CIR/CodeGen/vla.c index b185ec73eb03..1e449230925d 100644 --- a/clang/test/CIR/CodeGen/vla.c +++ b/clang/test/CIR/CodeGen/vla.c @@ -93,3 +93,59 @@ int f9(unsigned n, char (*p)[n][n+1][6]) { return p2 - p; } + +long f10(int n) { + int (*p)[n]; + int (*q)[n]; + return q - p; +} +// CHECK: cir.func dso_local @f10(%arg0: !s32i +// CHECK: %[[N_ADDR:.*]] = cir.alloca !s32i +// CHECK: %[[RETVAL:.*]] = cir.alloca !s64i +// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr +// CHECK: %[[Q_ADDR:.*]] = cir.alloca !cir.ptr + +// CHECK: %[[N:.*]] = cir.load{{.*}} %[[N_ADDR]] : !cir.ptr, !s32i +// CHECK: %[[N_U64:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i + +// CHECK: %[[Q:.*]] = cir.load{{.*}} %[[Q_ADDR]] +// CHECK: %[[P:.*]] = cir.load{{.*}} %[[P_ADDR]] + +// CHECK: %[[DIFF:.*]] = cir.ptr_diff %[[Q]], %[[P]] : !cir.ptr -> !s64i + +// CHECK: %[[N_S64:.*]] = cir.cast integral %[[N_U64]] : !u64i -> !s64i +// CHECK: %[[ELTSIZE:.*]] = cir.const #cir.int<4> : !s64i +// CHECK: %[[DIVISOR:.*]] = cir.binop(mul, %[[ELTSIZE]], %[[N_S64]]) nuw : !s64i + +// CHECK: %[[RESULT:.*]] = cir.binop(div, %[[DIFF]], %[[DIVISOR]]) : !s64i + + +long f11(int n, int m) { + int (*p)[n][m]; + int (*q)[n][m]; + return q - p; +} +// CHECK: cir.func dso_local @f11( +// CHECK: %[[N_ADDR:.*]] = cir.alloca !s32i +// CHECK: %[[M_ADDR:.*]] = cir.alloca !s32i +// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr +// CHECK: %[[Q_ADDR:.*]] = cir.alloca !cir.ptr + +// CHECK: %[[N:.*]] = cir.load{{.*}} %[[N_ADDR]] : !cir.ptr, !s32i +// CHECK: %[[N_U64:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i + +// CHECK: %[[M:.*]] = cir.load{{.*}} %[[M_ADDR]] : !cir.ptr, !s32i +// CHECK: %[[M_U64:.*]] = cir.cast integral %[[M]] : !s32i -> !u64i + +// CHECK: %[[Q:.*]] = cir.load{{.*}} %[[Q_ADDR]] +// CHECK: %[[P:.*]] = cir.load{{.*}} %[[P_ADDR]] + +// CHECK: %[[DIFF:.*]] = cir.ptr_diff %[[Q]], %[[P]] : !cir.ptr -> !s64i + +// CHECK: %[[NM_UL:.*]] = cir.binop(mul, %[[N_U64]], %[[M_U64]]) : !u64i +// CHECK: %[[NM:.*]] = cir.cast integral %[[NM_UL]] : !u64i -> !s64i + +// CHECK: %[[ELTSIZE:.*]] = cir.const #cir.int<4> : !s64i +// CHECK: %[[TOTALSIZE:.*]] = cir.binop(mul, %[[ELTSIZE]], %[[NM]]) nuw : !s64i + +// CHECK: %[[RESULT:.*]] = cir.binop(div, %[[DIFF]], %[[TOTALSIZE]]) : !s64i