Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 56 additions & 9 deletions clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,17 +1513,64 @@ mlir::Value ScalarExprEmitter::emitSub(const BinOpInfo &Ops) {
if (!mlir::isa<cir::PointerType>(Ops.RHS.getType()))
return emitPointerArithmetic(CGF, Ops, /*isSubtraction=*/true);

// Otherwise, this is a pointer subtraction

// Do the raw subtraction part.
//
// TODO(cir): note for LLVM lowering out of this; when expanding this into
// LLVM we shall take VLA's, division by element size, etc.
//
// See more in `EmitSub` in CGExprScalar.cpp.
mlir::Value lhs = Ops.LHS;
mlir::Value rhs = Ops.RHS;

cir::PointerType lhsPtrTy = mlir::dyn_cast<cir::PointerType>(lhs.getType());
cir::PointerType rhsPtrTy = mlir::dyn_cast<cir::PointerType>(rhs.getType());

if (lhsPtrTy && rhsPtrTy) {
cir::AddressSpace lhsAS = lhsPtrTy.getAddrSpace();
cir::AddressSpace rhsAS = rhsPtrTy.getAddrSpace();

if (lhsAS != rhsAS) {
// Different address spaces → use addrspacecast
rhs = Builder.createAddrSpaceCast(rhs, lhsPtrTy);
} else if (lhsPtrTy != rhsPtrTy) {
// Same addrspace but different pointee/type → bitcast is fine
rhs = Builder.createBitcast(rhs, lhsPtrTy);
}
}

assert(!cir::MissingFeatures::llvmLoweringPtrDiffConsidersPointee());
return cir::PtrDiffOp::create(Builder, CGF.getLoc(Ops.Loc), CGF.PtrDiffTy,
Ops.LHS, Ops.RHS);
mlir::Value diff = cir::PtrDiffOp::create(Builder, CGF.getLoc(Ops.Loc),
CGF.PtrDiffTy, lhs, rhs);

const BinaryOperator *expr = cast<BinaryOperator>(Ops.E);
QualType elementType = expr->getLHS()->getType()->getPointeeType();

mlir::Location loc = CGF.getLoc(Ops.Loc);
mlir::Value divisor;

// Check if this is a VLA pointee type.
if (const auto *vla = CGF.getContext().getAsVariableArrayType(elementType)) {
auto vlaSize = CGF.getVLASize(vla);
elementType = vlaSize.Type;
divisor = vlaSize.NumElts;

CharUnits eltSize = CGF.getContext().getTypeSizeInChars(elementType);
if (!eltSize.isOne()) {
cir::IntType cirIntTy = llvm::cast<cir::IntType>(CGF.PtrDiffTy);
cir::IntAttr eltSizeAttr =
cir::IntAttr::get(cirIntTy, eltSize.getQuantity());

if (divisor.getType() != CGF.PtrDiffTy)
divisor = Builder.createIntCast(divisor, CGF.PtrDiffTy);

auto eltSizeVal =
cir::ConstantOp::create(Builder, loc, cirIntTy, eltSizeAttr)
.getResult();
divisor = Builder.createNUWAMul(eltSizeVal, divisor);
}
} else {
// cir::ptrdiff correctly computes the ABI difference of 2 pointers. We
// do not need to compute anything else here. We just return it.
return diff;
}

return cir::BinOp::create(Builder, loc, CGF.PtrDiffTy, cir::BinOpKind::Div,
diff, divisor);
}

mlir::Value ScalarExprEmitter::emitShl(const BinOpInfo &Ops) {
Expand Down
60 changes: 60 additions & 0 deletions clang/test/CIR/CodeGen/HIP/ptr-diff.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include "cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.ll
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s

__device__ int ptr_diff() {
const char c_str[] = "c-string";
const char* len = c_str;
return c_str - len;
}


// CIR-DEVICE: %[[#LenLocalAddr:]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["len", init]
// CIR-DEVICE: %[[#GlobalPtr:]] = cir.get_global @_ZZ8ptr_diffvE5c_str : !cir.ptr<!cir.array<!s8i x 9>, addrspace(offload_constant)>
// CIR-DEVICE: %[[#CastDecay:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr<!cir.array<!s8i x 9>, addrspace(offload_constant)>
// CIR-DEVICE: %[[#LenLocalAddrCast:]] = cir.cast bitcast %[[#LenLocalAddr]] : !cir.ptr<!cir.ptr<!s8i>> -> !cir.ptr<!cir.ptr<!s8i, addrspace(offload_constant)>>
// CIR-DEVICE: cir.store align(8) %[[#CastDecay]], %[[#LenLocalAddrCast]] : !cir.ptr<!s8i, addrspace(offload_constant)>, !cir.ptr<!cir.ptr<!s8i, addrspace(offload_constant)>>
// CIR-DEVICE: %[[#CStr:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr<!cir.array<!s8i x 9>, addrspace(offload_constant)> -> !cir.ptr<!s8i, addrspace(offload_constant)>
// CIR-DEVICE: %[[#LoadedLenAddr:]] = cir.load align(8) %[[#LenLocalAddr]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i> loc(#loc7)
// CIR-DEVICE: %[[#AddrCast:]] = cir.cast address_space %[[#LoadedLenAddr]] : !cir.ptr<!s8i> -> !cir.ptr<!s8i, addrspace(offload_constant)>
// CIR-DEVICE: %[[#DIFF:]] = cir.ptr_diff %[[#CStr]], %[[#AddrCast]] : !cir.ptr<!s8i, addrspace(offload_constant)>

// LLVM-DEVICE: define dso_local i32 @_Z8ptr_diffv()
// LLVM-DEVICE: %[[#GlobalPtrAddr:]] = alloca i32, i64 1, align 4, addrspace(5)
// LLVM-DEVICE: %[[#GlobalPtrCast:]] = addrspacecast ptr addrspace(5) %[[#GlobalPtrAddr]] to ptr
// LLVM-DEVICE: %[[#LenLocalAddr:]] = alloca ptr, i64 1, align 8, addrspace(5)
// LLVM-DEVICE: %[[#LenLocalAddrCast:]] = addrspacecast ptr addrspace(5) %[[#LenLocalAddr]] to ptr
// LLVM-DEVICE: store ptr addrspace(4) @_ZZ8ptr_diffvE5c_str, ptr %[[#LenLocalAddrCast]], align 8
// LLVM-DEVICE: %[[#LoadedAddr:]] = load ptr, ptr %[[#LenLocalAddrCast]], align 8
// LLVM-DEVICE: %[[#CastedVal:]] = addrspacecast ptr %[[#LoadedAddr]] to ptr addrspace(4)
// LLVM-DEVICE: %[[#IntVal:]] = ptrtoint ptr addrspace(4) %[[#CastedVal]] to i64
// LLVM-DEVICE: %[[#SubVal:]] = sub i64 ptrtoint (ptr addrspace(4) @_ZZ8ptr_diffvE5c_str to i64), %[[#IntVal]]

// OGCG-DEVICE: define dso_local noundef i32 @_Z8ptr_diffv() #0
// OGCG-DEVICE: %[[RETVAL:.*]] = alloca i32, align 4, addrspace(5)
// OGCG-DEVICE: %[[C_STR:.*]] = alloca [9 x i8], align 1, addrspace(5)
// OGCG-DEVICE: %[[LEN:.*]] = alloca ptr, align 8, addrspace(5)
// OGCG-DEVICE: %[[RETVAL_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[RETVAL]] to ptr
// OGCG-DEVICE: %[[C_STR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[C_STR]] to ptr
// OGCG-DEVICE: %[[LEN_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[LEN]] to ptr
// OGCG-DEVICE: %[[ARRAYDECAY:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0
// OGCG-DEVICE: store ptr %[[ARRAYDECAY]], ptr %[[LEN_ASCAST]], align 8
// OGCG-DEVICE: %[[ARRAYDECAY1:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0
// OGCG-DEVICE: %[[LOADED:.*]] = load ptr, ptr %[[LEN_ASCAST]], align 8
// OGCG-DEVICE: %[[LHS:.*]] = ptrtoint ptr %[[ARRAYDECAY1]] to i64
// OGCG-DEVICE: %[[RHS:.*]] = ptrtoint ptr %[[LOADED]] to i64
// OGCG-DEVICE: %[[SUB:.*]] = sub i64 %[[LHS]], %[[RHS]]
// OGCG-DEVICE: %[[CONV:.*]] = trunc i64 %[[SUB]] to i32
56 changes: 56 additions & 0 deletions clang/test/CIR/CodeGen/vla.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,59 @@ int f9(unsigned n, char (*p)[n][n+1][6]) {

return p2 - p;
}

long f10(int n) {
int (*p)[n];
int (*q)[n];
return q - p;
}
// CHECK: cir.func dso_local @f10(%arg0: !s32i
// CHECK: %[[N_ADDR:.*]] = cir.alloca !s32i
// CHECK: %[[RETVAL:.*]] = cir.alloca !s64i
// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!s32i>
// CHECK: %[[Q_ADDR:.*]] = cir.alloca !cir.ptr<!s32i>

// CHECK: %[[N:.*]] = cir.load{{.*}} %[[N_ADDR]] : !cir.ptr<!s32i>, !s32i
// CHECK: %[[N_U64:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i

// CHECK: %[[Q:.*]] = cir.load{{.*}} %[[Q_ADDR]]
// CHECK: %[[P:.*]] = cir.load{{.*}} %[[P_ADDR]]

// CHECK: %[[DIFF:.*]] = cir.ptr_diff %[[Q]], %[[P]] : !cir.ptr<!s32i> -> !s64i

// CHECK: %[[N_S64:.*]] = cir.cast integral %[[N_U64]] : !u64i -> !s64i
// CHECK: %[[ELTSIZE:.*]] = cir.const #cir.int<4> : !s64i
// CHECK: %[[DIVISOR:.*]] = cir.binop(mul, %[[ELTSIZE]], %[[N_S64]]) nuw : !s64i

// CHECK: %[[RESULT:.*]] = cir.binop(div, %[[DIFF]], %[[DIVISOR]]) : !s64i


long f11(int n, int m) {
int (*p)[n][m];
int (*q)[n][m];
return q - p;
}
// CHECK: cir.func dso_local @f11(
// CHECK: %[[N_ADDR:.*]] = cir.alloca !s32i
// CHECK: %[[M_ADDR:.*]] = cir.alloca !s32i
// CHECK: %[[P_ADDR:.*]] = cir.alloca !cir.ptr<!s32i>
// CHECK: %[[Q_ADDR:.*]] = cir.alloca !cir.ptr<!s32i>

// CHECK: %[[N:.*]] = cir.load{{.*}} %[[N_ADDR]] : !cir.ptr<!s32i>, !s32i
// CHECK: %[[N_U64:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i

// CHECK: %[[M:.*]] = cir.load{{.*}} %[[M_ADDR]] : !cir.ptr<!s32i>, !s32i
// CHECK: %[[M_U64:.*]] = cir.cast integral %[[M]] : !s32i -> !u64i

// CHECK: %[[Q:.*]] = cir.load{{.*}} %[[Q_ADDR]]
// CHECK: %[[P:.*]] = cir.load{{.*}} %[[P_ADDR]]

// CHECK: %[[DIFF:.*]] = cir.ptr_diff %[[Q]], %[[P]] : !cir.ptr<!s32i> -> !s64i

// CHECK: %[[NM_UL:.*]] = cir.binop(mul, %[[N_U64]], %[[M_U64]]) : !u64i
// CHECK: %[[NM:.*]] = cir.cast integral %[[NM_UL]] : !u64i -> !s64i

// CHECK: %[[ELTSIZE:.*]] = cir.const #cir.int<4> : !s64i
// CHECK: %[[TOTALSIZE:.*]] = cir.binop(mul, %[[ELTSIZE]], %[[NM]]) nuw : !s64i

// CHECK: %[[RESULT:.*]] = cir.binop(div, %[[DIFF]], %[[TOTALSIZE]]) : !s64i