From d2bccf721dcb4ba0b1b94284a8756febd6f43848 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 8 Nov 2024 09:45:09 +0800 Subject: [PATCH 1/2] [CIR] [Lowering] [X86_64] Support VAArg in shape --- clang/include/clang/CIR/ABIArgInfo.h | 2 + .../CIR/Dialect/Builder/CIRBaseBuilder.h | 11 + .../Dialect/Transforms/LoweringPrepare.cpp | 9 +- .../Transforms/LoweringPrepareCXXABI.h | 1 + .../Transforms/LoweringPrepareX86ABI.h | 0 .../Transforms/TargetLowering/ABIInfoImpl.cpp | 7 + .../Transforms/TargetLowering/ABIInfoImpl.h | 1 + .../Transforms/TargetLowering/CIRCXXABI.h | 20 - .../TargetLowering/CIRLowerContext.cpp | 12 + .../Transforms/TargetLowering/CMakeLists.txt | 1 + .../TargetLowering/ItaniumCXXABI.cpp | 1 + .../Targets/LoweringPrepareX86CXXABI.cpp | 362 ++++++++++++++++++ .../Transforms/TargetLowering/Targets/X86.cpp | 90 +---- .../TargetLowering/Targets/X86_64ABIInfo.h | 97 +++++ clang/test/CIR/CodeGen/abstract-cond.c | 2 - clang/test/CIR/Lowering/var-arg-x86_64.c | 40 ++ 16 files changed, 544 insertions(+), 112 deletions(-) create mode 100644 clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h create mode 100644 clang/test/CIR/Lowering/var-arg-x86_64.c diff --git a/clang/include/clang/CIR/ABIArgInfo.h b/clang/include/clang/CIR/ABIArgInfo.h index 818d3b62f13f..ad261d13c5c7 100644 --- a/clang/include/clang/CIR/ABIArgInfo.h +++ b/clang/include/clang/CIR/ABIArgInfo.h @@ -254,6 +254,8 @@ class ABIArgInfo { bool isExpand() const { return TheKind == Expand; } bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; } + bool isIgnore() const { return TheKind == Ignore; } + bool isSignExt() const { assert(isExtend() && "Invalid kind!"); return SignExt; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 49f1256db284..4df769481d13 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -49,6 +49,17 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { getAttr(ty, val)); } + mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits) { + return getConstAPSInt( + loc, llvm::APSInt(llvm::APInt(numBits, val), /*isUnsigned=*/false)); + } + + mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val, + unsigned numBits) { + return getConstAPSInt( + loc, llvm::APSInt(llvm::APInt(numBits, val), /*isUnsigned=*/true)); + } + mlir::Value getConstAPInt(mlir::Location loc, mlir::Type typ, const llvm::APInt &val) { return create(loc, typ, diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index b11a028cbc2f..df7c5c575227 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -126,9 +126,16 @@ struct LoweringPreparePass : public LoweringPrepareBase { void setASTContext(clang::ASTContext *c) { astCtx = c; - auto abiStr = c->getTargetInfo().getABI(); + const clang::TargetInfo &target = c->getTargetInfo(); + auto abiStr = target.getABI(); switch (c->getCXXABIKind()) { case clang::TargetCXXABI::GenericItanium: + if (target.getTriple().getArch() == llvm::Triple::x86_64) { + cxxABI.reset( + ::cir::LoweringPrepareCXXABI::createX86ABI(/*is64bit=*/true)); + break; + } + cxxABI.reset(::cir::LoweringPrepareCXXABI::createItaniumABI()); break; case clang::TargetCXXABI::GenericAArch64: diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h index 42e8917b43b6..3c252ba336a7 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h @@ -28,6 +28,7 @@ class LoweringPrepareCXXABI { public: static LoweringPrepareCXXABI *createItaniumABI(); static LoweringPrepareCXXABI *createAArch64ABI(::cir::AArch64ABIKind k); + static LoweringPrepareCXXABI *createX86ABI(bool is64Bit); virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op, diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp index 493ddffdce3d..ff199f0cc189 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp @@ -54,5 +54,12 @@ CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, return CXXABI.getRecordArgABI(RT); } +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI) { + auto sTy = dyn_cast(ty); + if (!sTy) + return CIRCXXABI::RAA_Default; + return getRecordArgABI(sTy, CXXABI); +} + } // namespace cir } // namespace mlir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h index 9e45bc4e0ecc..8873aa7a49aa 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h @@ -31,6 +31,7 @@ bool isAggregateTypeForABI(Type T); Type useFirstFieldIfTransparentUnion(Type Ty); CIRCXXABI::RecordArgABI getRecordArgABI(const StructType RT, CIRCXXABI &CXXABI); +CIRCXXABI::RecordArgABI getRecordArgABI(mlir::Type ty, CIRCXXABI &CXXABI); } // namespace cir } // namespace mlir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h index 42e666999005..331d4dadffcc 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h @@ -68,24 +68,4 @@ CIRCXXABI *CreateItaniumCXXABI(LowerModule &CGM); } // namespace cir } // namespace mlir -// FIXME(cir): Merge this into the CIRCXXABI class above. To do so, this code -// should be updated to follow some level of codegen parity. -namespace cir { - -class LoweringPrepareCXXABI { -public: - static LoweringPrepareCXXABI *createItaniumABI(); - static LoweringPrepareCXXABI *createAArch64ABI(::cir::AArch64ABIKind k); - - virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder, - mlir::cir::VAArgOp op, - const cir::CIRDataLayout &datalayout) = 0; - virtual ~LoweringPrepareCXXABI() {} - - virtual mlir::Value lowerDynamicCast(CIRBaseBuilderTy &builder, - clang::ASTContext &astCtx, - mlir::cir::DynamicCastOp op) = 0; -}; -} // namespace cir - #endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRCXXABI_H diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp index c4912c651d21..122d7273f2fc 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRLowerContext.cpp @@ -94,6 +94,18 @@ clang::TypeInfo CIRLowerContext::getTypeInfoImpl(const Type T) const { Align = Target->getDoubleAlign(); break; } + if (auto longDoubleTy = dyn_cast(T)) { + if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice && + (Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() || + Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) { + Width = AuxTarget->getLongDoubleWidth(); + Align = AuxTarget->getLongDoubleAlign(); + } else { + Width = Target->getLongDoubleWidth(); + Align = Target->getLongDoubleAlign(); + } + break; + } cir_cconv_unreachable("Unknown builtin type!"); break; } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index 218656c3b144..d3cb9fc96f1a 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(TargetLowering Targets/X86.cpp Targets/LoweringPrepareAArch64CXXABI.cpp Targets/LoweringPrepareItaniumCXXABI.cpp + Targets/LoweringPrepareX86CXXABI.cpp DEPENDS clangBasic diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp index 87a1c5061aef..7fdf19f01cf1 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp @@ -20,6 +20,7 @@ // //===----------------------------------------------------------------------===// +#include "../LoweringPrepareCXXABI.h" #include "CIRCXXABI.h" #include "LowerModule.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp new file mode 100644 index 000000000000..79942be0aecd --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -0,0 +1,362 @@ +//====- LoweringPrepareX86CXXABI.cpp - Arm64 ABI specific code -------====// +// +// Part of the LLVM Project, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +// +// This file provides X86{_64, _32} C++ ABI specific code that is used during +// LLVMIR lowering prepare. +// +//===------------------------------------------------------------------===// + +#include "../LowerModule.h" +#include "../LoweringPrepareItaniumCXXABI.h" +#include "X86_64ABIInfo.h" + +using namespace clang; +using namespace cir; + +namespace { +class LoweringPrepareX86CXXABI : public LoweringPrepareItaniumCXXABI { + bool is64; + +public: + LoweringPrepareX86CXXABI(bool is64) : is64(is64) {} + mlir::Value lowerVAArg(cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) override { + if (is64) + return lowerVAArgX86_64(builder, op, datalayout); + + return lowerVAArgX86_32(builder, op, datalayout); + } + + mlir::Value lowerVAArgX86_64(cir::CIRBaseBuilderTy &builder, + mlir::cir::VAArgOp op, + const cir::CIRDataLayout &datalayout); + mlir::Value lowerVAArgX86_32(cir::CIRBaseBuilderTy &builder, + mlir::cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) { + llvm_unreachable("lowerVAArg for X86_32 not implemented yet"); + } +}; + +std::unique_ptr getLowerModule(mlir::cir::VAArgOp op) { + mlir::ModuleOp mo = op->getParentOfType(); + if (!mo) + return nullptr; + + mlir::PatternRewriter rewriter(mo.getContext()); + return mlir::cir::createLowerModule(mo, rewriter); +} + +mlir::Value buildX86_64VAArgFromMemory(cir::CIRBaseBuilderTy &builder, + const cir::CIRDataLayout &datalayout, + mlir::Value valist, mlir::Type Ty, + mlir::Location loc) { + mlir::Value overflow_arg_area_p = + builder.createGetMemberOp(loc, valist, "overflow_arg_area", 2); + mlir::Value overflow_arg_area = builder.createLoad(loc, overflow_arg_area_p); + + // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 + // byte boundary if alignment needed by type exceeds 8 byte boundary. + // It isn't stated explicitly in the standard, but in practice we use + // alignment greater than 16 where necessary. + unsigned alignment = datalayout.getABITypeAlign(Ty).value() / 8; + if (alignment > 8) + // overflow_arg_area = emitRoundPointerUpToAlignment(builder, + // overflow_arg_area, alignment); + llvm_unreachable("NYI"); + + // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. + mlir::Value res = overflow_arg_area; + + // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: + // l->overflow_arg_area + sizeof(type). + // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to + // an 8 byte boundary. + uint64_t sizeInBytes = datalayout.getTypeStoreSize(Ty).getFixedValue(); + mlir::Value stride = builder.getSignedInt(loc, ((sizeInBytes + 7) & ~7), 32); + mlir::Value castedPtr = + builder.createPtrBitcast(overflow_arg_area, builder.getSIntNTy(8)); + overflow_arg_area = builder.createPtrStride(loc, castedPtr, stride); + builder.createStore(loc, overflow_arg_area, overflow_arg_area_p); + + return res; +} + +mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( + cir::CIRBaseBuilderTy &builder, mlir::cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) { + using namespace mlir::cir; + + // FIXME: return early since X86_64ABIInfo::classify can't handle these types. + // Let's hope LLVM's va_arg instruction can take care of it. + // Remove this when X86_64ABIInfo::classify can take care of every type. + if (!mlir::isa(op.getType())) + return nullptr; + + // Assume that va_list type is correct; should be pointer to LLVM type: + // struct { + // i32 gp_offset; + // i32 fp_offset; + // i8* overflow_arg_area; + // i8* reg_save_area; + // }; + unsigned neededInt, neededSSE; + + std::unique_ptr lowerModule = getLowerModule(op); + if (!lowerModule) + return nullptr; + + mlir::Type ty = op.getType(); + + // FIXME: How should we access the X86AVXABILevel? + X86_64ABIInfo abiInfo(lowerModule->getTypes(), X86AVXABILevel::None); + ABIArgInfo ai = abiInfo.classifyArgumentType( + ty, 0, neededInt, neededSSE, /*isNamedArg=*/false, /*IsRegCall=*/false); + + // Empty records are ignored for parameter passing purposes. + if (ai.isIgnore()) + return nullptr; + + mlir::Location loc = op.getLoc(); + mlir::Value valist = op.getOperand(); + + // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed + // in the registers. If not go to step 7. + if (!neededInt && !neededSSE) + return builder.createLoad( + loc, builder.createPtrBitcast(buildX86_64VAArgFromMemory( + builder, datalayout, valist, ty, loc), + ty)); + + auto currentBlock = builder.getInsertionBlock(); + + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of + // general purpose registers needed to pass type and num_fp to hold + // the number of floating point registers needed. + + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or + // l->fp_offset > 304 - num_fp * 16 go to step 7. + // + // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of + // register save space). + + mlir::Value inRegs; + mlir::Value gp_offset_p, fp_offset_p; + mlir::Value gp_offset, fp_offset; + + if (neededInt) { + gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); + gp_offset = builder.createLoad(loc, gp_offset_p); + inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); + inRegs = + builder.createCompare(loc, mlir::cir::CmpOpKind::le, gp_offset, inRegs); + } + + if (neededSSE) { + fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); + fp_offset = builder.createLoad(loc, fp_offset_p); + mlir::Value fitsInFP = + builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); + fitsInFP = builder.createCompare(loc, mlir::cir::CmpOpKind::le, fp_offset, + fitsInFP); + inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; + } + + mlir::Block *contBlock = currentBlock->splitBlock(op); + mlir::Block *inRegBlock = builder.createBlock(contBlock); + mlir::Block *inMemBlock = builder.createBlock(contBlock); + + builder.setInsertionPointToEnd(currentBlock); + builder.create(loc, inRegs, inRegBlock, inMemBlock); + + // Emit code to load the value if it was passed in registers. + builder.setInsertionPointToStart(inRegBlock); + + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with + // an offset of l->gp_offset and/or l->fp_offset. This may require + // copying to a temporary location in case the parameter is passed + // in different register classes or requires an alignment greater + // than 8 for general purpose registers and 16 for XMM registers. + // + // FIXME: This really results in shameful code when we end up needing to + // collect arguments from different places; often what should result in a + // simple assembling of a structure from scattered addresses has many more + // loads than necessary. Can we clean this up? + mlir::Value regSaveArea = builder.createLoad( + loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); + mlir::Value regAddr; + + uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); + // The alignment of result address. + uint64_t alignment = 0; + if (neededInt && neededSSE) { + // FIXME: Cleanup. + assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); + StructType structTy = mlir::cast(ai.getCoerceToType()); + mlir::cir::PointerType addrTy = builder.getPointerTo(ty); + + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, structTy); + assert(structTy.getNumElements() == 2 && + "Unexpected ABI info for mixed regs"); + mlir::Type tyLo = structTy.getMembers()[0]; + mlir::Type tyHi = structTy.getMembers()[1]; + assert((isFPOrFPVectorTy(tyLo) ^ isFPOrFPVectorTy(tyHi)) && + "Unexpected ABI info for mixed regs"); + mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); + mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regLoAddr = isFPOrFPVectorTy(tyLo) ? fpAddr : gpAddr; + mlir::Value regHiAddr = isFPOrFPVectorTy(tyHi) ? gpAddr : fpAddr; + + // Copy the first element. + // FIXME: Our choice of alignment here and below is probably pessimistic. + mlir::Value v = builder.createAlignedLoad( + loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); + + // Copy the second element. + v = builder.createAlignedLoad(loc, regHiAddr, + datalayout.getABITypeAlign(tyHi).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } else if (neededInt || neededSSE == 1) { + uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); + + mlir::Type coTy; + if (ai.isDirect()) + coTy = ai.getCoerceToType(); + + mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; + alignment = neededInt ? 8 : 16; + uint64_t regSize = neededInt ? neededInt * 8 : 16; + // There are two cases require special handling: + // 1) + // ``` + // struct { + // struct {} a[8]; + // int b; + // }; + // ``` + // The lower 8 bytes of the structure are not stored, + // so an 8-byte offset is needed when accessing the structure. + // 2) + // ``` + // struct { + // long long a; + // struct {} b; + // }; + // ``` + // The stored size of this structure is smaller than its actual size, + // which may lead to reading past the end of the register save area. + if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { + mlir::cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + mlir::Value addr = + builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + mlir::Value src = builder.createAlignedLoad( + loc, builder.createPtrBitcast(addr, coTy), tyAlign); + mlir::Value ptrOffset = + builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); + mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); + builder.createStore(loc, src, dst); + regAddr = tmp; + } else { + regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (neededInt && tyAlign > 8) { + mlir::cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca( + loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); + builder.createMemCpy(loc, tmp, regAddr, + builder.getUnsignedInt(loc, tySize, 32)); + regAddr = tmp; + } + } + + } else { + assert(neededSSE == 2 && "Invalid number of needed registers!"); + // SSE registers are spaced 16 bytes apart in the register save + // area, we need to collect the two eightbytes together. + // The ABI isn't explicit about this, but it seems reasonable + // to assume that the slots are 16-byte aligned, since the stack is + // naturally 16-byte aligned and the prologue is expected to store + // all the SSE registers to the RSA. + + mlir::Value regAddrLo = + builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regAddrHi = builder.createPtrStride( + loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); + + mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); + StructType structTy = + ai.canHaveCoerceToType() + ? cast(ai.getCoerceToType()) + : StructType::get( + Context, {DoubleType::get(Context), DoubleType::get(Context)}, + /*packed=*/false, StructType::Struct); + mlir::cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, structTy); + mlir::Value v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrLo, structTy.getMembers()[0])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); + v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrHi, structTy.getMembers()[1])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } + + // AMD64-ABI 3.5.7p5: Step 5. Set: + // l->gp_offset = l->gp_offset + num_gp * 8 + // l->fp_offset = l->fp_offset + num_fp * 16. + if (neededInt) { + mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); + builder.createStore(loc, builder.createAdd(gp_offset, offset), gp_offset_p); + } + + if (neededSSE) { + mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); + builder.createStore(loc, builder.createAdd(fp_offset, offset), fp_offset_p); + } + + builder.create(loc, mlir::ValueRange{regAddr}, contBlock); + + // Emit code to load the value if it was passed in memory. + builder.setInsertionPointToStart(inMemBlock); + mlir::Value memAddr = + buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); + builder.create(loc, mlir::ValueRange{memAddr}, contBlock); + + // Return the appropriate result. + builder.setInsertionPointToStart(contBlock); + mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); + + return alignment + ? builder.createAlignedLoad( + loc, builder.createPtrBitcast(res_addr, ty), alignment) + : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); +} +} // namespace + +cir::LoweringPrepareCXXABI * +cir::LoweringPrepareCXXABI::createX86ABI(bool is64Bit) { + return new LoweringPrepareX86CXXABI(is64Bit); +} diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp index b50702a5ee68..05ad15f4ffa7 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp @@ -5,6 +5,7 @@ #include "LowerModule.h" #include "LowerTypes.h" #include "TargetInfo.h" +#include "X86_64ABIInfo.h" #include "clang/CIR/ABIArgInfo.h" #include "clang/CIR/Dialect/IR/CIRDataLayout.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -102,95 +103,6 @@ Type getFPTypeAtOffset(Type IRType, unsigned IROffset, } // namespace -class X86_64ABIInfo : public ABIInfo { - using Class = ::cir::X86ArgClass; - - /// Implement the X86_64 ABI merging algorithm. - /// - /// Merge an accumulating classification \arg Accum with a field - /// classification \arg Field. - /// - /// \param Accum - The accumulating classification. This should - /// always be either NoClass or the result of a previous merge - /// call. In addition, this should never be Memory (the caller - /// should just return Memory for the aggregate). - static Class merge(Class Accum, Class Field); - - /// Implement the X86_64 ABI post merging algorithm. - /// - /// Post merger cleanup, reduces a malformed Hi and Lo pair to - /// final MEMORY or SSE classes when necessary. - /// - /// \param AggregateSize - The size of the current aggregate in - /// the classification process. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the higher words of the containing object. - /// - void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; - - /// Determine the x86_64 register classes in which the given type T should be - /// passed. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the high word of the containing object. - /// - /// \param OffsetBase - The bit offset of this type in the - /// containing object. Some parameters are classified different - /// depending on whether they straddle an eightbyte boundary. - /// - /// \param isNamedArg - Whether the argument in question is a "named" - /// argument, as used in AMD64-ABI 3.5.7. - /// - /// \param IsRegCall - Whether the calling conversion is regcall. - /// - /// If a word is unused its result will be NoClass; if a type should - /// be passed in Memory then at least the classification of \arg Lo - /// will be Memory. - /// - /// The \arg Lo class will be NoClass iff the argument is ignored. - /// - /// If the \arg Lo class is ComplexX87, then the \arg Hi class will - /// also be ComplexX87. - void classify(Type T, uint64_t OffsetBase, Class &Lo, Class &Hi, - bool isNamedArg, bool IsRegCall = false) const; - - Type GetSSETypeAtOffset(Type IRType, unsigned IROffset, Type SourceTy, - unsigned SourceOffset) const; - - Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy, - unsigned SourceOffset) const; - - /// The 0.98 ABI revision clarified a lot of ambiguities, - /// unfortunately in ways that were not always consistent with - /// certain previous compilers. In particular, platforms which - /// required strict binary compatibility with older versions of GCC - /// may need to exempt themselves. - bool honorsRevision0_98() const { - return !getTarget().getTriple().isOSDarwin(); - } - - X86AVXABILevel AVXLevel; - -public: - X86_64ABIInfo(LowerTypes &CGT, X86AVXABILevel AVXLevel) - : ABIInfo(CGT), AVXLevel(AVXLevel) {} - - ::cir::ABIArgInfo classifyReturnType(Type RetTy) const; - - ABIArgInfo classifyArgumentType(Type Ty, unsigned freeIntRegs, - unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg, bool IsRegCall) const; - - void computeInfo(LowerFunctionInfo &FI) const override; -}; - class X86_64TargetLoweringInfo : public TargetLoweringInfo { public: X86_64TargetLoweringInfo(LowerTypes &LM, X86AVXABILevel AVXLevel) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h new file mode 100644 index 000000000000..0955d204d3a1 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h @@ -0,0 +1,97 @@ +#include "ABIInfo.h" +#include "clang/CIR/Target/x86.h" + +namespace mlir { +namespace cir { +class X86_64ABIInfo : public ABIInfo { + using Class = ::cir::X86ArgClass; + + /// Implement the X86_64 ABI merging algorithm. + /// + /// Merge an accumulating classification \arg Accum with a field + /// classification \arg Field. + /// + /// \param Accum - The accumulating classification. This should + /// always be either NoClass or the result of a previous merge + /// call. In addition, this should never be Memory (the caller + /// should just return Memory for the aggregate). + static Class merge(Class Accum, Class Field); + + /// Implement the X86_64 ABI post merging algorithm. + /// + /// Post merger cleanup, reduces a malformed Hi and Lo pair to + /// final MEMORY or SSE classes when necessary. + /// + /// \param AggregateSize - The size of the current aggregate in + /// the classification process. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the higher words of the containing object. + /// + void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; + + /// Determine the x86_64 register classes in which the given type T should be + /// passed. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the high word of the containing object. + /// + /// \param OffsetBase - The bit offset of this type in the + /// containing object. Some parameters are classified different + /// depending on whether they straddle an eightbyte boundary. + /// + /// \param isNamedArg - Whether the argument in question is a "named" + /// argument, as used in AMD64-ABI 3.5.7. + /// + /// \param IsRegCall - Whether the calling conversion is regcall. + /// + /// If a word is unused its result will be NoClass; if a type should + /// be passed in Memory then at least the classification of \arg Lo + /// will be Memory. + /// + /// The \arg Lo class will be NoClass iff the argument is ignored. + /// + /// If the \arg Lo class is ComplexX87, then the \arg Hi class will + /// also be ComplexX87. + void classify(Type T, uint64_t OffsetBase, Class &Lo, Class &Hi, + bool isNamedArg, bool IsRegCall = false) const; + + Type GetSSETypeAtOffset(Type IRType, unsigned IROffset, Type SourceTy, + unsigned SourceOffset) const; + + Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy, + unsigned SourceOffset) const; + + /// The 0.98 ABI revision clarified a lot of ambiguities, + /// unfortunately in ways that were not always consistent with + /// certain previous compilers. In particular, platforms which + /// required strict binary compatibility with older versions of GCC + /// may need to exempt themselves. + bool honorsRevision0_98() const { + return !getTarget().getTriple().isOSDarwin(); + } + + ::cir::X86AVXABILevel AVXLevel; + +public: + X86_64ABIInfo(LowerTypes &CGT, ::cir::X86AVXABILevel AVXLevel) + : ABIInfo(CGT), AVXLevel(AVXLevel) {} + + ::cir::ABIArgInfo classifyReturnType(Type RetTy) const; + + ::cir::ABIArgInfo classifyArgumentType(Type Ty, unsigned freeIntRegs, + unsigned &neededInt, + unsigned &neededSSE, bool isNamedArg, + bool IsRegCall) const; + + void computeInfo(LowerFunctionInfo &FI) const override; +}; + +} // namespace cir +} // namespace mlir \ No newline at end of file diff --git a/clang/test/CIR/CodeGen/abstract-cond.c b/clang/test/CIR/CodeGen/abstract-cond.c index dc3df811d8f4..c736c01983ff 100644 --- a/clang/test/CIR/CodeGen/abstract-cond.c +++ b/clang/test/CIR/CodeGen/abstract-cond.c @@ -1,7 +1,5 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s -// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll -// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s // ?: in "lvalue" struct s6 { int f0; }; diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c new file mode 100644 index 000000000000..f9ce354dffb2 --- /dev/null +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s + +#include + +double f1(int n, ...) { + va_list valist; + va_start(valist, n); + double res = va_arg(valist, double); + va_end(valist); + return res; +} + +// CHECK: [[VA_LIST_TYPE:%.+]] = type { i32, i32, ptr, ptr } + +// CHECK: define {{.*}}@f1 +// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]] +// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]]) +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: [[FP_OFFSET_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 1 +// CHECK: [[FP_OFFSET:%.+]] = load {{.*}}, ptr [[FP_OFFSET_P]] +// CHECK: [[COMPARED:%.+]] = icmp ule i32 {{.*}}, 160 +// CHECK: br i1 [[COMPARED]], label %[[THEN_BB:.+]], label %[[ELSE_BB:.+]], +// +// CHECK: [[THEN_BB]]: +// CHECK: [[UPDATED_FP_OFFSET:%.+]] = add i32 [[FP_OFFSET]], 8 +// CHECK: store i32 [[UPDATED_FP_OFFSET]], ptr [[FP_OFFSET_P]] +// CHECK: br label %[[CONT_BB:.+]], +// +// CHECK: [[ELSE_BB]]: +// CHECK: [[OVERFLOW_ARG_AREA_ADDR:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2 +// CHECK: [[OVERFLOW_ARG_AREA:%.+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_ADDR]] +// CHECK: [[OVERFLOW_ARG_AREA_OFFSET:%.+]] = getelementptr {{.*}} [[OVERFLOW_ARG_AREA]], i64 8 +// CHECK: store ptr [[OVERFLOW_ARG_AREA_OFFSET]], ptr [[OVERFLOW_ARG_AREA_ADDR]] +// CHECK: br label %[[CONT_BB]] +// +// CHECK: [[CONT_BB]]: +// CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]]) From 7d28be4e7e2479cfc1753313e98c94da252555e2 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 7 Nov 2024 16:46:51 +0800 Subject: [PATCH 2/2] [CIR] [Lowering] [X86_64] Support VAArg for LongDouble --- .../Targets/LoweringPrepareX86CXXABI.cpp | 5 +- .../Transforms/TargetLowering/Targets/X86.cpp | 118 +++++++++++++++++- .../TargetLowering/Targets/X86_64ABIInfo.h | 10 +- clang/test/CIR/Lowering/var-arg-x86_64.c | 28 +++++ 4 files changed, 153 insertions(+), 8 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp index 79942be0aecd..8d1c5527bf6f 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -47,7 +47,6 @@ std::unique_ptr getLowerModule(mlir::cir::VAArgOp op) { mlir::ModuleOp mo = op->getParentOfType(); if (!mo) return nullptr; - mlir::PatternRewriter rewriter(mo.getContext()); return mlir::cir::createLowerModule(mo, rewriter); } @@ -96,7 +95,7 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( // Let's hope LLVM's va_arg instruction can take care of it. // Remove this when X86_64ABIInfo::classify can take care of every type. if (!mlir::isa(op.getType())) + StructType, LongDoubleType>(op.getType())) return nullptr; // Assume that va_list type is correct; should be pointer to LLVM type: @@ -111,7 +110,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( std::unique_ptr lowerModule = getLowerModule(op); if (!lowerModule) return nullptr; - mlir::Type ty = op.getType(); // FIXME: How should we access the X86AVXABILevel? @@ -172,7 +170,6 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( mlir::Block *contBlock = currentBlock->splitBlock(op); mlir::Block *inRegBlock = builder.createBlock(contBlock); mlir::Block *inMemBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); builder.create(loc, inRegs, inRegBlock, inMemBlock); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp index 05ad15f4ffa7..3a5ded33e894 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp @@ -166,6 +166,21 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi, Current = Class::SSE; return; + } else if (isa(Ty)) { + const llvm::fltSemantics *LDF = + &getContext().getTargetInfo().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::IEEEquad()) { + Lo = Class::SSE; + Hi = Class::SSEUp; + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { + Lo = Class::X87; + Hi = Class::X87Up; + } else if (LDF == &llvm::APFloat::IEEEdouble()) { + Current = Class::SSE; + } else { + llvm_unreachable("unexpected long double representation!"); + } + return; } else if (isa(Ty)) { Current = Class::Integer; } else if (const auto RT = dyn_cast(Ty)) { @@ -268,6 +283,65 @@ void X86_64ABIInfo::classify(Type Ty, uint64_t OffsetBase, Class &Lo, Class &Hi, cir_cconv_unreachable("NYI"); } +ABIArgInfo X86_64ABIInfo::getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + // + // This assumption is optimistic, as there could be free registers available + // when we need to pass this argument in memory, and LLVM could try to pass + // the argument in the free register. This does not seem to happen currently, + // but this code would be much safer if we could mark the argument with + // 'onstack'. See PR12193. + if (!isAggregateTypeForABI(ty) /* && IsIllegalVectorType(Ty) &&*/ + /*!Ty->isBitIntType()*/) { + // FIXME: Handling enum type? + + return (isPromotableIntegerTypeForABI(ty) ? ABIArgInfo::getExtend(ty) + : ABIArgInfo::getDirect()); + } + + if (CIRCXXABI::RecordArgABI RAA = getRecordArgABI(ty, getCXXABI())) + return getNaturalAlignIndirect(ty, RAA == CIRCXXABI::RAA_DirectInMemory); + + // Compute the byval alignment. We specify the alignment of the byval in all + // cases so that the mid-level optimizer knows the alignment of the byval. + unsigned align = std::max(getContext().getTypeAlign(ty) / 8, 8U); + + // Attempt to avoid passing indirect results using byval when possible. This + // is important for good codegen. + // + // We do this by coercing the value into a scalar type which the backend can + // handle naturally (i.e., without using byval). + // + // For simplicity, we currently only do this when we have exhausted all of the + // free integer registers. Doing this when there are free integer registers + // would require more care, as we would have to ensure that the coerced value + // did not claim the unused register. That would require either reording the + // arguments to the function (so that any subsequent inreg values came first), + // or only doing this optimization when there were no following arguments that + // might be inreg. + // + // We currently expect it to be rare (particularly in well written code) for + // arguments to be passed on the stack when there are still free integer + // registers available (this would typically imply large structs being passed + // by value), so this seems like a fair tradeoff for now. + // + // We can revisit this if the backend grows support for 'onstack' parameter + // attributes. See PR12193. + if (freeIntRegs == 0) { + uint64_t size = getContext().getTypeSize(ty); + + // If this type fits in an eightbyte, coerce it into the matching integral + // type, which will end up on the stack (with alignment 8). + if (align == 8 && size <= 64) + return ABIArgInfo::getDirect( + mlir::cir::IntType::get(LT.getMLIRContext(), size, false)); + } + + return ABIArgInfo::getIndirect(align); +} + /// Return a type that will be passed by the backend in the low 8 bytes of an /// XMM register, corresponding to the SSE class. Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset, @@ -278,7 +352,7 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset, (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset; Type T0 = getFPTypeAtOffset(IRType, IROffset, TD); if (!T0 || isa(T0)) - return T0; // NOTE(cir): Not sure if this is correct. + return ::mlir::cir::DoubleType::get(LT.getMLIRContext()); Type T1 = {}; unsigned T0Size = TD.getTypeAllocSize(T0); @@ -296,6 +370,8 @@ Type X86_64ABIInfo::GetSSETypeAtOffset(Type IRType, unsigned IROffset, return T0; } + return ::mlir::cir::DoubleType::get(LT.getMLIRContext()); + cir_cconv_unreachable("NYI"); } @@ -538,6 +614,22 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs, ++neededSSE; break; } + // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument + // on the stack. + case Class::Memory: + + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // COMPLEX_X87, it is passed in memory. + case Class::X87: + case Class::ComplexX87: + if (getRecordArgABI(Ty, getCXXABI()) == CIRCXXABI::RAA_Indirect) + ++neededInt; + return getIndirectResult(Ty, freeIntRegs); + + case Class::SSEUp: + case Class::X87Up: + llvm_unreachable("Invalid classification for lo word."); + default: cir_cconv_assert_or_abort( !::cir::MissingFeatures::X86ArgTypeClassification(), "NYI"); @@ -545,6 +637,11 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs, Type HighPart = {}; switch (Hi) { + case Class::Memory: + case Class::X87: + case Class::ComplexX87: + llvm_unreachable("Invalid classification for hi word."); + case Class::NoClass: break; @@ -557,8 +654,23 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(Type Ty, unsigned freeIntRegs, return ABIArgInfo::getDirect(HighPart, 8); break; - default: - cir_cconv_unreachable("NYI"); + // X87Up generally doesn't occur here (long double is passed in + // memory), except in situations involving unions. + case Class::X87Up: + case Class::SSE: + ++neededSSE; + HighPart = GetSSETypeAtOffset(Ty, 8, Ty, 8); + + if (Lo == Class::NoClass) // Pass HighPart at offset 8 in memory. + return ABIArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the + // eightbyte is passed in the upper half of the last used SSE + // register. This only happens when 128-bit vectors are passed. + case Class::SSEUp: + llvm_unreachable("NYI && We need to implement GetByteVectorType"); + break; } // If a high part was specified, merge it together with the low part. It is diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h index 0955d204d3a1..60b238dcd568 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h @@ -68,6 +68,14 @@ class X86_64ABIInfo : public ABIInfo { Type GetINTEGERTypeAtOffset(Type DestTy, unsigned IROffset, Type SourceTy, unsigned SourceOffset) const; + /// getIndirectResult - Give a source type \arg Ty, return a suitable result + /// such that the argument will be passed in memory. + /// + /// \param freeIntRegs - The number of free integer registers remaining + /// available. + ::cir::ABIArgInfo getIndirectResult(mlir::Type ty, + unsigned freeIntRegs) const; + /// The 0.98 ABI revision clarified a lot of ambiguities, /// unfortunately in ways that were not always consistent with /// certain previous compilers. In particular, platforms which @@ -94,4 +102,4 @@ class X86_64ABIInfo : public ABIInfo { }; } // namespace cir -} // namespace mlir \ No newline at end of file +} // namespace mlir diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c index f9ce354dffb2..23b215175bde 100644 --- a/clang/test/CIR/Lowering/var-arg-x86_64.c +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -38,3 +38,31 @@ double f1(int n, ...) { // CHECK: [[CONT_BB]]: // CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 // CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]]) + +long double f2(int n, ...) { + va_list valist; + va_start(valist, n); + long double res = va_arg(valist, long double); + va_end(valist); + return res; +} + +// CHECK: define {{.*}}@f2 +// CHECK: [[RESULT:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]] +// CHECK: [[RES:%.+]] = alloca x86_fp80 +// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]]) +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2 +// CHECK: [[OVERFLOW_AREA:%.+]] = load {{.*}}, ptr [[OVERFLOW_AREA_P]] +// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 16 +// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]] +// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[OVERFLOW_AREA]] +// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]] +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]]) +// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]] +// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]] +// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]] +// CHECK: ret x86_fp80 [[RETURN_VALUE]]